Beispiel #1
0
def FIM_uncert(func_ex,
               grid_pts,
               p0,
               data,
               log=False,
               multinom=True,
               eps=0.01):
    """
    Parameter uncertainties from Fisher Information Matrix

    Returns standard deviations of parameter values.

    func_ex: Model function
    all_boot: List of bootstrap frequency spectra
    p0: Best-fit parameters for func_ex
    data: Original data frequency spectrum
    eps: Fractional stepsize to use when taking finite-difference derivatives
    log: If True, assume log-normal distribution of parameters. Returned values 
         are then the standard deviations of the *logs* of the parameter values,
         which can be interpreted as relative parameter uncertainties.
    multinom: If True, assume model is defined without an explicit parameter for
              theta. Because uncertainty in theta must be accounted for to get
              correct uncertainties for other parameters, this function will
              automatically consider theta if multinom=True. In that case, the
              final entry of the returned uncertainties will correspond to
              theta.
    """
    if multinom:
        func_multi = func_ex
        model = func_multi(p0, data.sample_sizes, grid_pts)
        theta_opt = Inference.optimal_sfs_scaling(model, data)
        p0 = list(p0) + [theta_opt]
        func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts)
    H = get_godambe(func_ex, grid_pts, [], p0, data, eps, log, just_hess=True)
    return numpy.sqrt(numpy.diag(numpy.linalg.inv(H)))
Beispiel #2
0
def LRT_adjust(func_ex,
               grid_pts,
               all_boot,
               p0,
               data,
               nested_indices,
               multinom=True,
               eps=0.01):
    # XXX: Need to implement boot_theta_adjusts
    """
    First-order moment matching adjustment factor for likelihood ratio test

    func_ex: Model function for complex model
    grid_pts: Grid points at which to evaluate func_ex
    all_boot: List of bootstrap frequency spectra
    p0: Best-fit parameters for the simple model, with nested parameter
        explicity defined.  Although equal to values for simple model, should
        be in a list form that can be taken in by the complex model you'd like
        to evaluate.
    data: Original data frequency spectrum
    nested_indices: List of positions of nested parameters in complex model
                    parameter list
    multinom: If True, assume model is defined without an explicit parameter for
              theta. Because uncertainty in theta must be accounted for to get
              correct uncertainties for other parameters, this function will
              automatically consider theta if multinom=True.
    eps: Fractional stepsize to use when taking finite-difference derivatives
         Note that if eps*param is < 1e-6, then the step size for that parameter
         will simply be eps, to avoid numerical issues with small parameter
         perturbations.
    """
    if multinom:
        func_multi = func_ex
        model = func_multi(p0, data.sample_sizes, grid_pts)
        theta_opt = Inference.optimal_sfs_scaling(model, data)
        p0 = list(p0) + [theta_opt]
        func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts)

    # We only need to take derivatives with respect to the parameters in the
    # complex model that have been set to specified values in the simple model
    def diff_func(diff_params, ns, grid_pts):
        # diff_params argument is only the nested parameters. All the rest
        # should come from p0
        full_params = numpy.array(p0, copy=True, dtype=float)
        # Use numpy indexing to set relevant parameters
        full_params[nested_indices] = diff_params
        return func_ex(full_params, ns, grid_pts)

    p_nested = numpy.asarray(p0)[nested_indices]
    GIM, H, J, cU = get_godambe(diff_func,
                                grid_pts,
                                all_boot,
                                p_nested,
                                data,
                                eps,
                                log=False)

    adjust = len(nested_indices) / numpy.trace(
        numpy.dot(J, numpy.linalg.inv(H)))
    return adjust
Beispiel #3
0
def GIM_uncert(func_ex,
               grid_pts,
               all_boot,
               p0,
               data,
               log=False,
               multinom=True,
               eps=0.01,
               return_GIM=False,
               boot_theta_adjusts=None):
    """
    Parameter uncertainties from Godambe Information Matrix (GIM)

    Returns standard deviations of parameter values.

    func_ex: Model function
    all_boot: List of bootstrap frequency spectra
    p0: Best-fit parameters for func_ex
    data: Original data frequency spectrum
    eps: Fractional stepsize to use when taking finite-difference derivatives.
         Note that if eps*param is < 1e-6, then the step size for that parameter
         will simply be eps, to avoid numerical issues with small parameter
         perturbations.
    log: If True, assume log-normal distribution of parameters. Returned values
         are then the standard deviations of the *logs* of the parameter values,
         which can be interpreted as relative parameter uncertainties.
    multinom: If True, assume model is defined without an explicit parameter for
              theta. Because uncertainty in theta must be accounted for to get
              correct uncertainties for other parameters, this function will
              automatically consider theta if multinom=True. In that case, the
              final entry of the returned uncertainties will correspond to
              theta.
    return_GIM: If true, also return the full GIM.
    boot_theta_adjusts: Optionally, a sequence of *relative* values of theta
                        (compared to original data) to assume for bootstrap
                        data sets. Only valid when multinom=False.
    """
    if multinom:
        if boot_theta_adjusts:
            raise ValueError('boot_thetas option can only be used with '
                             'multinom=False')
        func_multi = func_ex
        model = func_multi(p0, data.sample_sizes, grid_pts)
        theta_opt = Inference.optimal_sfs_scaling(model, data)
        p0 = list(p0) + [theta_opt]
        func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts)
    GIM, H, J, cU = get_godambe(func_ex,
                                grid_pts,
                                all_boot,
                                p0,
                                data,
                                eps,
                                log,
                                boot_theta_adjusts=boot_theta_adjusts)
    uncerts = numpy.sqrt(numpy.diag(numpy.linalg.inv(GIM)))
    if not return_GIM:
        return uncerts
    else:
        return uncerts, GIM
Beispiel #4
0
def Wald_stat(func_ex,
              grid_pts,
              all_boot,
              p0,
              data,
              nested_indices,
              full_params,
              multinom=True,
              eps=0.01,
              adj_and_org=False):
    # XXX: Implement boot_theta_adjusts
    """
    Calculate test stastic from wald test
             
    func_ex: Model function for complex model
    all_boot: List of bootstrap frequency spectra
    p0: Best-fit parameters for the simple model, with nested parameter
        explicity defined.  Although equal to values for simple model, should
        be in a list form that can be taken in by the complex model you'd like
        to evaluate.
    data: Original data frequency spectrum
    nested_indices: List of positions of nested parameters in complex model
    parameter list
    full_params: Parameter values for parameters found only in complex model,
                 Can either be array with just values found only in the compelx
                 model, or entire list of parameters from complex model.
    multinom: If True, assume model is defined without an explicit parameter for
              theta. Because uncertainty in theta must be accounted for to get
              correct uncertainties for other parameters, this function will
              automatically consider theta if multinom=True. In that case, the
              final entry of the returned uncertainties will correspond to
              theta.
    eps: Fractional stepsize to use when taking finite-difference derivatives
         Note that if eps*param is < 1e-6, then the step size for that parameter
         will simply be eps, to avoid numerical issues with small parameter
         perturbations.
    adj_and_org: If False, return only adjusted Wald statistic. If True, also
                 return unadjusted statistic as second return value.
    """
    if multinom:
        func_multi = func_ex
        model = func_multi(p0, data.sample_sizes, grid_pts)
        theta_opt = Inference.optimal_sfs_scaling(model, data)
        # Also need to extend full_params
        if len(full_params) == len(p0):
            full_params = numpy.concatenate((full_params, [theta_opt]))
        p0 = list(p0) + [theta_opt]
        func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts)

    # We only need to take derivatives with respect to the parameters in the
    # complex model that have been set to specified values in the simple model
    def diff_func(diff_params, ns, grid_pts):
        # diff_params argument is only the nested parameters. All the rest
        # should come from p0
        full_params = numpy.array(p0, copy=True, dtype=float)
        # Use numpy indexing to set relevant parameters
        full_params[nested_indices] = diff_params
        return func_ex(full_params, ns, grid_pts)

    # Reduce full params list to be same length as nested indices
    if len(full_params) == len(p0):
        full_params = numpy.asarray(full_params)[nested_indices]
    if len(full_params) != len(nested_indices):
        raise KeyError('Full parameters not equal in length to p0 or nested '
                       'indices')

    p_nested = numpy.asarray(p0)[nested_indices]
    GIM, H, J, cU = get_godambe(diff_func,
                                grid_pts,
                                all_boot,
                                p_nested,
                                data,
                                eps,
                                log=False)
    param_diff = full_params - p_nested

    wald_adj = numpy.dot(numpy.dot(numpy.transpose(param_diff), GIM),
                         param_diff)
    wald_org = numpy.dot(numpy.dot(numpy.transpose(param_diff), H), param_diff)

    if adj_and_org:
        return wald_adj, wald_org
    return wald_adj
Beispiel #5
0
def score_stat(func_ex,
               grid_pts,
               all_boot,
               p0,
               data,
               nested_indices,
               multinom=True,
               eps=0.01,
               adj_and_org=False):
    """
    Calculate test stastic from score test
        
    func_ex: Model function for complex model
    grid_pts: Grid points to evaluate model function
    all_boot: List of bootstrap frequency spectra
    p0: Best-fit parameters for the simple model, with nested parameter
        explicity defined.  Although equal to values for simple model, should
        be in a list form that can be taken in by the complex model you'd like
        to evaluate.
    data: Original data frequency spectrum
    nested_indices: List of positions of nested parameters in complex model
                    parameter list
    eps: Fractional stepsize to use when taking finite-difference derivatives
    multinom: If True, assume model is defined without an explicit parameter for
              theta. Because uncertainty in theta must be accounted for to get
              correct uncertainties for other parameters, this function will
              automatically consider theta if multinom=True.
    adj_and_org: If False, return only adjusted score statistic. If True, also
                 return unadjusted statistic as second return value.
    """
    if multinom:
        func_multi = func_ex
        model = func_multi(p0, data.sample_sizes, grid_pts)
        theta_opt = Inference.optimal_sfs_scaling(model, data)
        p0 = list(p0) + [theta_opt]
        func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts)

    # We only need to take derivatives with respect to the parameters in the
    # complex model that have been set to specified values in the simple model
    def diff_func(diff_params, ns, grid_pts):
        # diff_params argument is only the nested parameters. All the rest
        # should come from p0
        full_params = numpy.array(p0, copy=True, dtype=float)
        # Use numpy indexing to set relevant parameters
        full_params[nested_indices] = diff_params
        return func_ex(full_params, ns, grid_pts)

    p_nested = numpy.asarray(p0)[nested_indices]
    GIM, H, J, cU = get_godambe(diff_func,
                                grid_pts,
                                all_boot,
                                p_nested,
                                data,
                                eps,
                                log=False)

    score_org = numpy.dot(numpy.dot(numpy.transpose(cU), numpy.linalg.inv(H)),
                          cU)[0, 0]
    score_adj = numpy.dot(numpy.dot(numpy.transpose(cU), numpy.linalg.inv(J)),
                          cU)[0, 0]

    if adj_and_org:
        return score_adj, score_org
    return score_adj