def FIM_uncert(func_ex, grid_pts, p0, data, log=False, multinom=True, eps=0.01): """ Parameter uncertainties from Fisher Information Matrix Returns standard deviations of parameter values. func_ex: Model function all_boot: List of bootstrap frequency spectra p0: Best-fit parameters for func_ex data: Original data frequency spectrum eps: Fractional stepsize to use when taking finite-difference derivatives log: If True, assume log-normal distribution of parameters. Returned values are then the standard deviations of the *logs* of the parameter values, which can be interpreted as relative parameter uncertainties. multinom: If True, assume model is defined without an explicit parameter for theta. Because uncertainty in theta must be accounted for to get correct uncertainties for other parameters, this function will automatically consider theta if multinom=True. In that case, the final entry of the returned uncertainties will correspond to theta. """ if multinom: func_multi = func_ex model = func_multi(p0, data.sample_sizes, grid_pts) theta_opt = Inference.optimal_sfs_scaling(model, data) p0 = list(p0) + [theta_opt] func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts) H = get_godambe(func_ex, grid_pts, [], p0, data, eps, log, just_hess=True) return numpy.sqrt(numpy.diag(numpy.linalg.inv(H)))
def LRT_adjust(func_ex, grid_pts, all_boot, p0, data, nested_indices, multinom=True, eps=0.01): # XXX: Need to implement boot_theta_adjusts """ First-order moment matching adjustment factor for likelihood ratio test func_ex: Model function for complex model grid_pts: Grid points at which to evaluate func_ex all_boot: List of bootstrap frequency spectra p0: Best-fit parameters for the simple model, with nested parameter explicity defined. Although equal to values for simple model, should be in a list form that can be taken in by the complex model you'd like to evaluate. data: Original data frequency spectrum nested_indices: List of positions of nested parameters in complex model parameter list multinom: If True, assume model is defined without an explicit parameter for theta. Because uncertainty in theta must be accounted for to get correct uncertainties for other parameters, this function will automatically consider theta if multinom=True. eps: Fractional stepsize to use when taking finite-difference derivatives Note that if eps*param is < 1e-6, then the step size for that parameter will simply be eps, to avoid numerical issues with small parameter perturbations. """ if multinom: func_multi = func_ex model = func_multi(p0, data.sample_sizes, grid_pts) theta_opt = Inference.optimal_sfs_scaling(model, data) p0 = list(p0) + [theta_opt] func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts) # We only need to take derivatives with respect to the parameters in the # complex model that have been set to specified values in the simple model def diff_func(diff_params, ns, grid_pts): # diff_params argument is only the nested parameters. All the rest # should come from p0 full_params = numpy.array(p0, copy=True, dtype=float) # Use numpy indexing to set relevant parameters full_params[nested_indices] = diff_params return func_ex(full_params, ns, grid_pts) p_nested = numpy.asarray(p0)[nested_indices] GIM, H, J, cU = get_godambe(diff_func, grid_pts, all_boot, p_nested, data, eps, log=False) adjust = len(nested_indices) / numpy.trace( numpy.dot(J, numpy.linalg.inv(H))) return adjust
def GIM_uncert(func_ex, grid_pts, all_boot, p0, data, log=False, multinom=True, eps=0.01, return_GIM=False, boot_theta_adjusts=None): """ Parameter uncertainties from Godambe Information Matrix (GIM) Returns standard deviations of parameter values. func_ex: Model function all_boot: List of bootstrap frequency spectra p0: Best-fit parameters for func_ex data: Original data frequency spectrum eps: Fractional stepsize to use when taking finite-difference derivatives. Note that if eps*param is < 1e-6, then the step size for that parameter will simply be eps, to avoid numerical issues with small parameter perturbations. log: If True, assume log-normal distribution of parameters. Returned values are then the standard deviations of the *logs* of the parameter values, which can be interpreted as relative parameter uncertainties. multinom: If True, assume model is defined without an explicit parameter for theta. Because uncertainty in theta must be accounted for to get correct uncertainties for other parameters, this function will automatically consider theta if multinom=True. In that case, the final entry of the returned uncertainties will correspond to theta. return_GIM: If true, also return the full GIM. boot_theta_adjusts: Optionally, a sequence of *relative* values of theta (compared to original data) to assume for bootstrap data sets. Only valid when multinom=False. """ if multinom: if boot_theta_adjusts: raise ValueError('boot_thetas option can only be used with ' 'multinom=False') func_multi = func_ex model = func_multi(p0, data.sample_sizes, grid_pts) theta_opt = Inference.optimal_sfs_scaling(model, data) p0 = list(p0) + [theta_opt] func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts) GIM, H, J, cU = get_godambe(func_ex, grid_pts, all_boot, p0, data, eps, log, boot_theta_adjusts=boot_theta_adjusts) uncerts = numpy.sqrt(numpy.diag(numpy.linalg.inv(GIM))) if not return_GIM: return uncerts else: return uncerts, GIM
def Wald_stat(func_ex, grid_pts, all_boot, p0, data, nested_indices, full_params, multinom=True, eps=0.01, adj_and_org=False): # XXX: Implement boot_theta_adjusts """ Calculate test stastic from wald test func_ex: Model function for complex model all_boot: List of bootstrap frequency spectra p0: Best-fit parameters for the simple model, with nested parameter explicity defined. Although equal to values for simple model, should be in a list form that can be taken in by the complex model you'd like to evaluate. data: Original data frequency spectrum nested_indices: List of positions of nested parameters in complex model parameter list full_params: Parameter values for parameters found only in complex model, Can either be array with just values found only in the compelx model, or entire list of parameters from complex model. multinom: If True, assume model is defined without an explicit parameter for theta. Because uncertainty in theta must be accounted for to get correct uncertainties for other parameters, this function will automatically consider theta if multinom=True. In that case, the final entry of the returned uncertainties will correspond to theta. eps: Fractional stepsize to use when taking finite-difference derivatives Note that if eps*param is < 1e-6, then the step size for that parameter will simply be eps, to avoid numerical issues with small parameter perturbations. adj_and_org: If False, return only adjusted Wald statistic. If True, also return unadjusted statistic as second return value. """ if multinom: func_multi = func_ex model = func_multi(p0, data.sample_sizes, grid_pts) theta_opt = Inference.optimal_sfs_scaling(model, data) # Also need to extend full_params if len(full_params) == len(p0): full_params = numpy.concatenate((full_params, [theta_opt])) p0 = list(p0) + [theta_opt] func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts) # We only need to take derivatives with respect to the parameters in the # complex model that have been set to specified values in the simple model def diff_func(diff_params, ns, grid_pts): # diff_params argument is only the nested parameters. All the rest # should come from p0 full_params = numpy.array(p0, copy=True, dtype=float) # Use numpy indexing to set relevant parameters full_params[nested_indices] = diff_params return func_ex(full_params, ns, grid_pts) # Reduce full params list to be same length as nested indices if len(full_params) == len(p0): full_params = numpy.asarray(full_params)[nested_indices] if len(full_params) != len(nested_indices): raise KeyError('Full parameters not equal in length to p0 or nested ' 'indices') p_nested = numpy.asarray(p0)[nested_indices] GIM, H, J, cU = get_godambe(diff_func, grid_pts, all_boot, p_nested, data, eps, log=False) param_diff = full_params - p_nested wald_adj = numpy.dot(numpy.dot(numpy.transpose(param_diff), GIM), param_diff) wald_org = numpy.dot(numpy.dot(numpy.transpose(param_diff), H), param_diff) if adj_and_org: return wald_adj, wald_org return wald_adj
def score_stat(func_ex, grid_pts, all_boot, p0, data, nested_indices, multinom=True, eps=0.01, adj_and_org=False): """ Calculate test stastic from score test func_ex: Model function for complex model grid_pts: Grid points to evaluate model function all_boot: List of bootstrap frequency spectra p0: Best-fit parameters for the simple model, with nested parameter explicity defined. Although equal to values for simple model, should be in a list form that can be taken in by the complex model you'd like to evaluate. data: Original data frequency spectrum nested_indices: List of positions of nested parameters in complex model parameter list eps: Fractional stepsize to use when taking finite-difference derivatives multinom: If True, assume model is defined without an explicit parameter for theta. Because uncertainty in theta must be accounted for to get correct uncertainties for other parameters, this function will automatically consider theta if multinom=True. adj_and_org: If False, return only adjusted score statistic. If True, also return unadjusted statistic as second return value. """ if multinom: func_multi = func_ex model = func_multi(p0, data.sample_sizes, grid_pts) theta_opt = Inference.optimal_sfs_scaling(model, data) p0 = list(p0) + [theta_opt] func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts) # We only need to take derivatives with respect to the parameters in the # complex model that have been set to specified values in the simple model def diff_func(diff_params, ns, grid_pts): # diff_params argument is only the nested parameters. All the rest # should come from p0 full_params = numpy.array(p0, copy=True, dtype=float) # Use numpy indexing to set relevant parameters full_params[nested_indices] = diff_params return func_ex(full_params, ns, grid_pts) p_nested = numpy.asarray(p0)[nested_indices] GIM, H, J, cU = get_godambe(diff_func, grid_pts, all_boot, p_nested, data, eps, log=False) score_org = numpy.dot(numpy.dot(numpy.transpose(cU), numpy.linalg.inv(H)), cU)[0, 0] score_adj = numpy.dot(numpy.dot(numpy.transpose(cU), numpy.linalg.inv(J)), cU)[0, 0] if adj_and_org: return score_adj, score_org return score_adj