def get_godambe(func_ex, all_boot, p0, data, eps, log=True): #assume that last element of p0 is theta, and the remaining elements are the demographic model parameters #log dictates whether parameters are regular are logarithmic #func_ex is dadi extrapolated function, all_boot is bootstrapped data, p0 is best_fit params for data/func_ex combination J = numpy.zeros((len(p0), len(p0))) func = lambda params: Inference.ll(params[-1]*func_ex(params[:-1], ns, grid_pts), data) hess = -get_hess(func, p0, eps) if log: func = lambda params: Inference.ll(numpy.exp(params[-1])*func_ex(numpy.exp(params[:-1]), ns, grid_pts), data) hess = -get_hess(func, numpy.log(p0), eps) for ii, boot in enumerate(all_boot): boot = Spectrum(boot) if not log: func = lambda params: Inference.ll(params[-1]*func_ex(params[:-1], ns, grid_pts), boot) grad_temp = get_grad(func, p0, eps) if log: func = lambda params: Inference.ll(numpy.exp(params[-1])*func_ex(numpy.exp(params[:-1]), ns, grid_pts), boot) grad_temp = get_grad(func, numpy.log(p0), eps) J_temp = numpy.outer(grad_temp, grad_temp) J = J + J_temp J = J/len(all_boot) J_inv = numpy.linalg.inv(J) # G = H*J^-1*H godambe = numpy.dot(numpy.dot(hess, J_inv), hess) return godambe, hess
def _object_func(params, data, model_func, sel_dist, theta, lower_bound=None, upper_bound=None, verbose=0, multinom=False, flush_delay=0, func_args=[], func_kwargs={}, fixed_params=None, ll_scale=1, output_stream=sys.stdout, store_thetas=False): """ Objective function for optimization. """ global _counter _counter += 1 # Deal with fixed parameters params_up = Inference._project_params_up(params, fixed_params) # Check our parameter bounds if lower_bound is not None: for pval, bound in zip(params_up, lower_bound): if bound is not None and pval < bound: return -_out_of_bounds_val / ll_scale if upper_bound is not None: for pval, bound in zip(params_up, upper_bound): if bound is not None and pval > bound: return -_out_of_bounds_val / ll_scale ns = data.sample_sizes all_args = [params_up, ns, sel_dist, theta] + list(func_args) sfs = model_func(*all_args, **func_kwargs) if multinom: result = Inference.ll_multinom(sfs, data) else: result = Inference.ll(sfs, data) if store_thetas: global _theta_store _theta_store[tuple(params)] = optimal_sfs_scaling(sfs, data) # Bad result if numpy.isnan(result): result = _out_of_bounds_val if (verbose > 0) and (_counter % verbose == 0): param_str = 'array([%s])' % (', '.join( ['%- 12g' % v for v in params_up])) output_stream.write('%-8i, %-12g, %s%s' % (_counter, result, param_str, os.linesep)) Misc.delayed_flush(delay=flush_delay) return -result / ll_scale
def plot_1d_comp_Poisson(model, data, fig_num=None, residual='Anscombe', plot_masked=False, show=True): """ Poisson comparison between 1d model and data. model: 1-dimensional model SFS data: 1-dimensional data SFS fig_num: Clear and use figure fig_num for display. If None, an new figure window is created. residual: 'Anscombe' for Anscombe residuals, which are more normally distributed for Poisson sampling. 'linear' for the linear residuals, which can be less biased. plot_masked: Additionally plots (in open circles) results for points in the model or data that were masked. show: If True, execute pylab.show command to make sure plot displays. """ if fig_num is None: f = pylab.gcf() else: f = pylab.figure(fig_num, figsize=(7, 7)) pylab.clf() if data.folded and not model.folded: model = model.fold() masked_model, masked_data = Numerics.intersect_masks(model, data) ax = pylab.subplot(2, 1, 1) pylab.semilogy(masked_data, '-ob', label='data') pylab.semilogy(masked_model, '-or', label='model') if plot_masked: pylab.semilogy(masked_data.data, '--ob', mfc='w', zorder=-100) pylab.semilogy(masked_model.data, '--or', mfc='w', zorder=-100) ax.legend(loc='upper right') pylab.subplot(2, 1, 2, sharex=ax) if residual == 'Anscombe': resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data) elif residual == 'linear': resid = Inference.linear_Poisson_residual(masked_model, masked_data) else: raise ValueError("Unknown class of residual '%s'." % residual) pylab.plot(resid, '-og') if plot_masked: pylab.plot(resid.data, '--og', mfc='w', zorder=-100) ax.set_xlim(0, data.shape[0] - 1) if show: pylab.show()
def optimize(p0, data1, data2, cache1, cache2, model_func, sel_dist, scal_fac1, scal_fac2, theta1, theta2, lower_bound=None, upper_bound=None, verbose=0, flush_delay=0.5, epsilon=1e-3, gtol=1e-5, multinom=False, maxiter=None, full_output=False, func_args=[], func_kwargs={}, fixed_params=None, ll_scale=1, output_file=None): if output_file: output_stream = file(output_file, 'w') else: output_stream = sys.stdout args = (data1, data2, cache1, cache2, model_func, sel_dist, scal_fac1, scal_fac2, theta1, theta2, lower_bound, upper_bound, verbose, multinom, flush_delay, func_args, func_kwargs, fixed_params, ll_scale, output_stream) p0 = Inference._project_params_down(p0, fixed_params) outputs = scipy.optimize.fmin_bfgs(_object_func, p0, epsilon=epsilon, args=args, gtol=gtol, full_output=True, disp=False, maxiter=maxiter) xopt, fopt, gopt, Bopt, func_calls, grad_calls, warnflag = outputs xopt = Inference._project_params_up(xopt, fixed_params) if output_file: output_stream.close() if not full_output: return [-fopt, xopt] else: return xopt, fopt, gopt, Bopt, func_calls, grad_calls, warnflag
def LRT_adjust(func_ex, grid_pts, all_boot, p0, data, nested_indices, multinom=True, eps=0.01): # XXX: Need to implement boot_theta_adjusts """ First-order moment matching adjustment factor for likelihood ratio test func_ex: Model function for complex model grid_pts: Grid points at which to evaluate func_ex all_boot: List of bootstrap frequency spectra p0: Best-fit parameters for the simple model, with nested parameter explicity defined. Although equal to values for simple model, should be in a list form that can be taken in by the complex model you'd like to evaluate. data: Original data frequency spectrum nested_indices: List of positions of nested parameters in complex model parameter list multinom: If True, assume model is defined without an explicit parameter for theta. Because uncertainty in theta must be accounted for to get correct uncertainties for other parameters, this function will automatically consider theta if multinom=True. eps: Fractional stepsize to use when taking finite-difference derivatives Note that if eps*param is < 1e-6, then the step size for that parameter will simply be eps, to avoid numerical issues with small parameter perturbations. """ if multinom: func_multi = func_ex model = func_multi(p0, data.sample_sizes, grid_pts) theta_opt = Inference.optimal_sfs_scaling(model, data) p0 = list(p0) + [theta_opt] func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts) # We only need to take derivatives with respect to the parameters in the # complex model that have been set to specified values in the simple model def diff_func(diff_params, ns, grid_pts): # diff_params argument is only the nested parameters. All the rest # should come from p0 full_params = numpy.array(p0, copy=True, dtype=float) # Use numpy indexing to set relevant parameters full_params[nested_indices] = diff_params return func_ex(full_params, ns, grid_pts) p_nested = numpy.asarray(p0)[nested_indices] GIM, H, J, cU = get_godambe(diff_func, grid_pts, all_boot, p_nested, data, eps, log=False) adjust = len(nested_indices) / numpy.trace( numpy.dot(J, numpy.linalg.inv(H))) return adjust
def func(params, data, theta_adjust=1): key = (tuple(params), tuple(ns), tuple(grid_pts)) if key not in cache: cache[key] = func_ex(params, ns, grid_pts) # theta_adjust deals with bootstraps that need different thetas fs = theta_adjust * cache[key] return Inference.ll(fs, data)
def FIM_uncert(func_ex, grid_pts, p0, data, log=False, multinom=True, eps=0.01): """ Parameter uncertainties from Fisher Information Matrix Returns standard deviations of parameter values. func_ex: Model function all_boot: List of bootstrap frequency spectra p0: Best-fit parameters for func_ex data: Original data frequency spectrum eps: Fractional stepsize to use when taking finite-difference derivatives log: If True, assume log-normal distribution of parameters. Returned values are then the standard deviations of the *logs* of the parameter values, which can be interpreted as relative parameter uncertainties. multinom: If True, assume model is defined without an explicit parameter for theta. Because uncertainty in theta must be accounted for to get correct uncertainties for other parameters, this function will automatically consider theta if multinom=True. In that case, the final entry of the returned uncertainties will correspond to theta. """ if multinom: func_multi = func_ex model = func_multi(p0, data.sample_sizes, grid_pts) theta_opt = Inference.optimal_sfs_scaling(model, data) p0 = list(p0) + [theta_opt] func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts) H = get_godambe(func_ex, grid_pts, [], p0, data, eps, log, just_hess=True) return numpy.sqrt(numpy.diag(numpy.linalg.inv(H)))
def plot_3d_comp_multinom(model, data, vmin=None, vmax=None, resid_range=None, fig_num=None, pop_ids=None, residual='Anscombe', adjust=True): """ Multinomial comparison between 3d model and data. model: 3-dimensional model SFS data: 3-dimensional data SFS vmin, vmax: Minimum and maximum values plotted for sfs are vmin and vmax respectively. resid_range: Residual plot saturates at +- resid_range. fig_num: Clear and use figure fig_num for display. If None, an new figure window is created. pop_ids: If not None, override pop_ids stored in Spectrum. residual: 'Anscombe' for Anscombe residuals, which are more normally distributed for Poisson sampling. 'linear' for the linear residuals, which can be less biased. adjust: Should method use automatic 'subplots_adjust'? For advanced manipulation of plots, it may be useful to make this False. This comparison is multinomial in that it rescales the model to optimally fit the data. """ model = Inference.optimally_scaled_sfs(model, data) plot_3d_comp_Poisson(model, data, vmin=vmin, vmax=vmax, resid_range=resid_range, fig_num=fig_num, pop_ids=pop_ids, residual=residual, adjust=adjust)
def plot_1d_comp_multinom(model, data, fig_num=None, residual='Anscombe', plot_masked=False): """ Mulitnomial comparison between 1d model and data. model: 1-dimensional model SFS data: 1-dimensional data SFS fig_num: Clear and use figure fig_num for display. If None, an new figure window is created. residual: 'Anscombe' for Anscombe residuals, which are more normally distributed for Poisson sampling. 'linear' for the linear residuals, which can be less biased. plot_masked: Additionally plots (in open circles) results for points in the model or data that were masked. This comparison is multinomial in that it rescales the model to optimally fit the data. """ model = Inference.optimally_scaled_sfs(model, data) plot_1d_comp_Poisson(model, data, fig_num, residual, plot_masked)
def GIM_uncert(func_ex, grid_pts, all_boot, p0, data, log=False, multinom=True, eps=0.01, return_GIM=False, boot_theta_adjusts=None): """ Parameter uncertainties from Godambe Information Matrix (GIM) Returns standard deviations of parameter values. func_ex: Model function all_boot: List of bootstrap frequency spectra p0: Best-fit parameters for func_ex data: Original data frequency spectrum eps: Fractional stepsize to use when taking finite-difference derivatives. Note that if eps*param is < 1e-6, then the step size for that parameter will simply be eps, to avoid numerical issues with small parameter perturbations. log: If True, assume log-normal distribution of parameters. Returned values are then the standard deviations of the *logs* of the parameter values, which can be interpreted as relative parameter uncertainties. multinom: If True, assume model is defined without an explicit parameter for theta. Because uncertainty in theta must be accounted for to get correct uncertainties for other parameters, this function will automatically consider theta if multinom=True. In that case, the final entry of the returned uncertainties will correspond to theta. return_GIM: If true, also return the full GIM. boot_theta_adjusts: Optionally, a sequence of *relative* values of theta (compared to original data) to assume for bootstrap data sets. Only valid when multinom=False. """ if multinom: if boot_theta_adjusts: raise ValueError('boot_thetas option can only be used with ' 'multinom=False') func_multi = func_ex model = func_multi(p0, data.sample_sizes, grid_pts) theta_opt = Inference.optimal_sfs_scaling(model, data) p0 = list(p0) + [theta_opt] func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts) GIM, H, J, cU = get_godambe(func_ex, grid_pts, all_boot, p0, data, eps, log, boot_theta_adjusts=boot_theta_adjusts) uncerts = numpy.sqrt(numpy.diag(numpy.linalg.inv(GIM))) if not return_GIM: return uncerts else: return uncerts, GIM
def optimize(p0, data, model_func, sel_dist, theta, lower_bound=None, upper_bound=None, verbose=0, flush_delay=0.5, epsilon=1e-3, gtol=1e-5, multinom=False, maxiter=None, full_output=False, func_args=[], func_kwargs={}, fixed_params=None, ll_scale=1, output_file=None): """ optimizer for use with distributions where log transformations do not work, e.g. when gamma is positive and negative """ if output_file: output_stream = file(output_file, 'w') else: output_stream = sys.stdout args = (data, model_func, sel_dist, theta, lower_bound, upper_bound, verbose, multinom, flush_delay, func_args, func_kwargs, fixed_params, ll_scale, output_stream) p0 = _project_params_down(p0, fixed_params) outputs = scipy.optimize.fmin_bfgs(_object_func, p0, epsilon=epsilon, args=args, gtol=gtol, full_output=True, disp=False, maxiter=maxiter) xopt, fopt, gopt, Bopt, func_calls, grad_calls, warnflag = outputs xopt = Inference._project_params_up(xopt, fixed_params) if output_file: output_stream.close() if not full_output: return xopt else: return xopt, fopt, gopt, Bopt, func_calls, grad_calls, warnflag
def plot_1d_comp_Poisson(model, data, fig_num=None, residual='Anscombe', plot_masked=False): """ Poisson comparison between 1d model and data. model: 1-dimensional model SFS data: 1-dimensional data SFS fig_num: Clear and use figure fig_num for display. If None, an new figure window is created. residual: 'Anscombe' for Anscombe residuals, which are more normally distributed for Poisson sampling. 'linear' for the linear residuals, which can be less biased. plot_masked: Additionally plots (in open circles) results for points in the model or data that were masked. """ if fig_num is None: f = pylab.gcf() else: f = pylab.figure(fig_num, figsize=(7,7)) pylab.clf() if data.folded and not model.folded: model = model.fold() masked_model, masked_data = Numerics.intersect_masks(model, data) ax = pylab.subplot(2,1,1) pylab.semilogy(masked_data, '-ob') pylab.semilogy(masked_model, '-or') if plot_masked: pylab.semilogy(masked_data.data, '--ob', mfc='w', zorder=-100) pylab.semilogy(masked_model.data, '--or', mfc='w', zorder=-100) pylab.subplot(2,1,2, sharex = ax) if residual == 'Anscombe': resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data) elif residual == 'linear': resid = Inference.linear_Poisson_residual(masked_model, masked_data) else: raise ValueError("Unknown class of residual '%s'." % residual) pylab.plot(resid, '-og') if plot_masked: pylab.plot(resid.data, '--og', mfc='w', zorder=-100) ax.set_xlim(0, data.shape[0]-1) pylab.show()
def plot_2d_comp_Poisson(model, data, vmin=None, vmax=None, resid_range=None, fig_num=None, pop_ids=None, residual='Anscombe', adjust=True, saveplot=False, nomplot="plot_2d_comp_Poisson", showplot=True): """ Poisson comparison between 2d model and data. model: 2-dimensional model SFS data: 2-dimensional data SFS vmin, vmax: Minimum and maximum values plotted for sfs are vmin and vmax respectively. resid_range: Residual plot saturates at +- resid_range. fig_num: Clear and use figure fig_num for display. If None, an new figure window is created. pop_ids: If not None, override pop_ids stored in Spectrum. residual: 'Anscombe' for Anscombe residuals, which are more normally distributed for Poisson sampling. 'linear' for the linear residuals, which can be less biased. adjust: Should method use automatic 'subplots_adjust'? For advanced manipulation of plots, it may be useful to make this False. """ if data.folded and not model.folded: model = model.fold() masked_model, masked_data = Numerics.intersect_masks(model, data) if fig_num is None: f = pylab.gcf() else: f = pylab.figure(fig_num, figsize=(7,7)) pylab.clf() if adjust: pylab.subplots_adjust(bottom=0.07, left=0.07, top=0.94, right=0.95, hspace=0.26, wspace=0.26) max_toplot = max(masked_model.max(), masked_data.max()) min_toplot = min(masked_model.min(), masked_data.min()) if vmax is None: vmax = max_toplot if vmin is None: vmin = min_toplot extend = _extend_mapping[vmin <= min_toplot, vmax >= max_toplot] if pop_ids is not None: data_pop_ids = model_pop_ids = resid_pop_ids = pop_ids if len(pop_ids) != 2: raise ValueError('pop_ids must be of length 2.') else: data_pop_ids = masked_data.pop_ids model_pop_ids = masked_model.pop_ids if masked_model.pop_ids is None: model_pop_ids = data_pop_ids if model_pop_ids == data_pop_ids: resid_pop_ids = model_pop_ids else: resid_pop_ids = None ax = pylab.subplot(2,2,1) plot_single_2d_sfs(masked_data, vmin=vmin, vmax=vmax, pop_ids=data_pop_ids, colorbar=False) ax.set_title('data') ax2 = pylab.subplot(2,2,2, sharex=ax, sharey=ax) plot_single_2d_sfs(masked_model, vmin=vmin, vmax=vmax, pop_ids=model_pop_ids, extend=extend ) ax2.set_title('model') if residual == 'Anscombe': resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data, mask=vmin) elif residual == 'linear': resid = Inference.linear_Poisson_residual(masked_model, masked_data, mask=vmin) else: raise ValueError("Unknown class of residual '%s'." % residual) if resid_range is None: resid_range = max((abs(resid.max()), abs(resid.min()))) resid_extend = _extend_mapping[-resid_range <= resid.min(), resid_range >= resid.max()] ax3 = pylab.subplot(2,2,3, sharex=ax, sharey=ax) plot_2d_resid(resid, resid_range, pop_ids=resid_pop_ids, extend=resid_extend) ax3.set_title('residuals') ax = pylab.subplot(2,2,4) flatresid = numpy.compress(numpy.logical_not(resid.mask.ravel()), resid.ravel()) ax.hist(flatresid, bins=20, normed=True) ax.set_title('residuals') ax.set_yticks([]) if saveplot: nomplot=nomplot + ".png" pylab.savefig(nomplot) if showplot: pylab.show()
def score_stat(func_ex, grid_pts, all_boot, p0, data, nested_indices, multinom=True, eps=0.01, adj_and_org=False): """ Calculate test stastic from score test func_ex: Model function for complex model grid_pts: Grid points to evaluate model function all_boot: List of bootstrap frequency spectra p0: Best-fit parameters for the simple model, with nested parameter explicity defined. Although equal to values for simple model, should be in a list form that can be taken in by the complex model you'd like to evaluate. data: Original data frequency spectrum nested_indices: List of positions of nested parameters in complex model parameter list eps: Fractional stepsize to use when taking finite-difference derivatives multinom: If True, assume model is defined without an explicit parameter for theta. Because uncertainty in theta must be accounted for to get correct uncertainties for other parameters, this function will automatically consider theta if multinom=True. adj_and_org: If False, return only adjusted score statistic. If True, also return unadjusted statistic as second return value. """ if multinom: func_multi = func_ex model = func_multi(p0, data.sample_sizes, grid_pts) theta_opt = Inference.optimal_sfs_scaling(model, data) p0 = list(p0) + [theta_opt] func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts) # We only need to take derivatives with respect to the parameters in the # complex model that have been set to specified values in the simple model def diff_func(diff_params, ns, grid_pts): # diff_params argument is only the nested parameters. All the rest # should come from p0 full_params = numpy.array(p0, copy=True, dtype=float) # Use numpy indexing to set relevant parameters full_params[nested_indices] = diff_params return func_ex(full_params, ns, grid_pts) p_nested = numpy.asarray(p0)[nested_indices] GIM, H, J, cU = get_godambe(diff_func, grid_pts, all_boot, p_nested, data, eps, log=False) score_org = numpy.dot(numpy.dot(numpy.transpose(cU), numpy.linalg.inv(H)), cU)[0, 0] score_adj = numpy.dot(numpy.dot(numpy.transpose(cU), numpy.linalg.inv(J)), cU)[0, 0] if adj_and_org: return score_adj, score_org return score_adj
grad_temp = get_grad(func, numpy.log(p0), eps) J_temp = numpy.outer(grad_temp, grad_temp) J = J + J_temp J = J/len(all_boot) J_inv = numpy.linalg.inv(J) # G = H*J^-1*H godambe = numpy.dot(numpy.dot(hess, J_inv), hess) return godambe, hess def uncert(func_ex, all_boot, p0, data, eps, log=True): godambe, hess = get_godambe(func_ex, all_boot, p0, data, eps, log) return numpy.sqrt(numpy.diag(numpy.linalg.inv((godambe)))) def LRT(func_ex, all_boot, p0, data, eps, diff=1) #p0 is the best fit parameters in the simple model with the complex model parameter(s) as the first diff number of parameters in p0 adjust = 0.0 func = lambda param: Inference.ll_multinom(func_ex([param[:diff]+p0[diff:]], ns, grid_pts), data) H = -get_hess_log(func, p0[:diff], eps) J_boot = numpy.zeros([diff, diff]) J_array = [] for i in range(0, len(all_boot)): boot = Spectrum(all_boot[i]) func = lambda param: Inference.ll_multinom(func_ex([param[:diff]+p0[diff:]], ns, grid_pts), boot) cU_theta = get_grad_log(func, p0[:diff], eps) J_theta = numpy.outer(cU_theta, cU_theta) J_boot = J_boot + J_theta J_array.append(J_theta) J = J_boot/len(all_boot) adjust = diff/numpy.trace(numpy.dot(J, numpy.linalg.inv(H))) return adjust
maxiter=50) print('Optimized parameters', str(repr(popt))) model = func_ex(popt[0], ns, pts_l) ll_opt = dadi.Inference.ll_multinom(model, pop) print('Optimized log-likelihood:', str(ll_opt)) print('Theta0_2:', str(theta0)) ##Print out the scaled SFS and the Anscombe Poisson residuals print("==============================================") print(Inference.optimally_scaled_sfs(model, pop)) rescaled = Inference.optimally_scaled_sfs(model, pop) print(Inference.Anscombe_Poisson_residual(rescaled,pop)) dadi.Plotting.plot_1d_comp_multinom(model,pop) print("==============================================") ####Neutral Fit######### ########################### print("NEUTRAL MODEL") params = [1]
def optimize_cons(p0, data, model_func, sel_dist, theta, lower_bound=None, upper_bound=None, verbose=0, flush_delay=0.5, epsilon=1e-4, constraint=None, gtol=1e-6, multinom=False, maxiter=None, full_output=False, func_args=[], func_kwargs={}, fixed_params=None, ll_scale=1, output_file=None): """ Constrained optimization needs a constraint function and bounds. """ if output_file: output_stream = file(output_file, 'w') else: output_stream = sys.stdout if not (lower_bound is None): lower_bound_a = lower_bound + [0] if not (upper_bound is None): upper_bound_a = upper_bound + [numpy.inf] args = (data, model_func, sel_dist, theta, lower_bound, upper_bound, verbose, multinom, flush_delay, func_args, func_kwargs, fixed_params, ll_scale, output_stream) p0 = Inference._project_params_down(p0, fixed_params) ####make sure to define consfunc and bnds #### if (not lower_bound is None) and (not upper_bound is None): bnds = tuple((x, y) for x, y in zip(lower_bound, upper_bound)) outputs = scipy.optimize.fmin_slsqp(_object_func, p0, bounds=bnds, args=args, f_eqcons=constraint, epsilon=epsilon, iter=maxiter, full_output=True, disp=False) xopt, fopt, func_calls, grad_calls, warnflag = outputs xopt = Inference._project_params_up(xopt, fixed_params) if output_file: output_stream.close() if not full_output: return [-fopt, xopt] else: return xopt, fopt, func_calls, grad_calls, warnflag
def _object_func(params, data1, data2, cache1, cache2, model_func, sel_dist, scal_fac1, scal_fac2, theta1, theta2, lower_bound=None, upper_bound=None, verbose=0, multinom=False, flush_delay=0, func_args=[], func_kwargs={}, fixed_params1=None, fixed_params2=None, ll_scale=1, output_stream=sys.stdout, store_thetas=False): """ Objective function for optimization. """ global _counter _counter += 1 # Scaling factors scales sel_dist differently for species 1 and species 2 sel_dist1 = copy_func( sel_dist, defaults=scal_fac1) # scal_fac1 should be 2*Nea of pop 1 sel_dist2 = copy_func( sel_dist, defaults=scal_fac2) # scal_fac2 should be 4*Nea of pop 2 # Deal with fixed parameters params_up1 = Inference._project_params_up(params, fixed_params1) params_up2 = Inference._project_params_up(params, fixed_params2) # Check our parameter bounds if lower_bound is not None: for pval, bound in zip(params_up1, lower_bound): if bound is not None and pval < bound: return -_out_of_bounds_val / ll_scale if upper_bound is not None: for pval, bound in zip(params_up1, upper_bound): if bound is not None and pval > bound: return -_out_of_bounds_val / ll_scale ns1 = data1.sample_sizes ns2 = data2.sample_sizes all_args1 = [params_up1, ns1, sel_dist1, theta1, cache1] + list(func_args) all_args2 = [params_up2, ns2, sel_dist2, theta2, cache2] + list(func_args) # Pass the pts argument via keyword, but don't alter the passed-in # func_kwargs #func_kwargs = func_kwargs.copy() #func_kwargs['pts'] = pts sfs1 = model_func(*all_args1, **func_kwargs) sfs2 = model_func(*all_args2, **func_kwargs) if multinom: result = Inference.ll_multinom(sfs1, data1) + Inference.ll_multinom( sfs2, data2) else: result = Inference.ll(sfs1, data1) + Inference.ll(sfs2, data2) # Bad result if numpy.isnan(result): result = _out_of_bounds_val if (verbose > 0) and (_counter % verbose == 0): param_str = 'array([%s])' % (', '.join( ['%- 12g' % v for v in params_up1])) output_stream.write('%-8i, %-12g, %s%s' % (_counter, result, param_str, os.linesep)) Misc.delayed_flush(delay=flush_delay) return -result / ll_scale
def func(params, data): key = (tuple(params), tuple(ns), tuple(grid_pts)) if key not in cache: cache[key] = func_ex(params, ns, grid_pts) fs = cache[key] return Inference.ll(fs, data)
def Wald_stat(func_ex, grid_pts, all_boot, p0, data, nested_indices, full_params, multinom=True, eps=0.01, adj_and_org=False): # XXX: Implement boot_theta_adjusts """ Calculate test stastic from wald test func_ex: Model function for complex model all_boot: List of bootstrap frequency spectra p0: Best-fit parameters for the simple model, with nested parameter explicity defined. Although equal to values for simple model, should be in a list form that can be taken in by the complex model you'd like to evaluate. data: Original data frequency spectrum nested_indices: List of positions of nested parameters in complex model parameter list full_params: Parameter values for parameters found only in complex model, Can either be array with just values found only in the compelx model, or entire list of parameters from complex model. multinom: If True, assume model is defined without an explicit parameter for theta. Because uncertainty in theta must be accounted for to get correct uncertainties for other parameters, this function will automatically consider theta if multinom=True. In that case, the final entry of the returned uncertainties will correspond to theta. eps: Fractional stepsize to use when taking finite-difference derivatives Note that if eps*param is < 1e-6, then the step size for that parameter will simply be eps, to avoid numerical issues with small parameter perturbations. adj_and_org: If False, return only adjusted Wald statistic. If True, also return unadjusted statistic as second return value. """ if multinom: func_multi = func_ex model = func_multi(p0, data.sample_sizes, grid_pts) theta_opt = Inference.optimal_sfs_scaling(model, data) # Also need to extend full_params if len(full_params) == len(p0): full_params = numpy.concatenate((full_params, [theta_opt])) p0 = list(p0) + [theta_opt] func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts) # We only need to take derivatives with respect to the parameters in the # complex model that have been set to specified values in the simple model def diff_func(diff_params, ns, grid_pts): # diff_params argument is only the nested parameters. All the rest # should come from p0 full_params = numpy.array(p0, copy=True, dtype=float) # Use numpy indexing to set relevant parameters full_params[nested_indices] = diff_params return func_ex(full_params, ns, grid_pts) # Reduce full params list to be same length as nested indices if len(full_params) == len(p0): full_params = numpy.asarray(full_params)[nested_indices] if len(full_params) != len(nested_indices): raise KeyError('Full parameters not equal in length to p0 or nested ' 'indices') p_nested = numpy.asarray(p0)[nested_indices] GIM, H, J, cU = get_godambe(diff_func, grid_pts, all_boot, p_nested, data, eps, log=False) param_diff = full_params - p_nested wald_adj = numpy.dot(numpy.dot(numpy.transpose(param_diff), GIM), param_diff) wald_org = numpy.dot(numpy.dot(numpy.transpose(param_diff), H), param_diff) if adj_and_org: return wald_adj, wald_org return wald_adj