Ejemplo n.º 1
0
def get_godambe(func_ex, all_boot, p0, data, eps, log=True):
    #assume that last element of p0 is theta, and the remaining elements are the demographic model parameters
    #log dictates whether parameters are regular are logarithmic
    #func_ex is dadi extrapolated function, all_boot is bootstrapped data, p0 is best_fit params for data/func_ex combination
    J = numpy.zeros((len(p0), len(p0)))
    func = lambda params: Inference.ll(params[-1]*func_ex(params[:-1], ns, grid_pts), data)
    hess = -get_hess(func, p0, eps)
    if log:
        func = lambda params: Inference.ll(numpy.exp(params[-1])*func_ex(numpy.exp(params[:-1]), ns, grid_pts), data)
        hess = -get_hess(func, numpy.log(p0), eps)
    for ii, boot in enumerate(all_boot):
        boot = Spectrum(boot)
        if not log:
            func = lambda params: Inference.ll(params[-1]*func_ex(params[:-1], ns, grid_pts), boot)
            grad_temp = get_grad(func, p0, eps)
        if log:
            func = lambda params: Inference.ll(numpy.exp(params[-1])*func_ex(numpy.exp(params[:-1]), ns, grid_pts), boot)
            grad_temp = get_grad(func, numpy.log(p0), eps)
        J_temp = numpy.outer(grad_temp, grad_temp)
        J = J + J_temp
    J = J/len(all_boot)
    J_inv = numpy.linalg.inv(J)
    # G = H*J^-1*H
    godambe = numpy.dot(numpy.dot(hess, J_inv), hess)
    return godambe, hess
Ejemplo n.º 2
0
def _object_func(params,
                 data,
                 model_func,
                 sel_dist,
                 theta,
                 lower_bound=None,
                 upper_bound=None,
                 verbose=0,
                 multinom=False,
                 flush_delay=0,
                 func_args=[],
                 func_kwargs={},
                 fixed_params=None,
                 ll_scale=1,
                 output_stream=sys.stdout,
                 store_thetas=False):
    """
    Objective function for optimization.
    """
    global _counter
    _counter += 1

    # Deal with fixed parameters
    params_up = Inference._project_params_up(params, fixed_params)

    # Check our parameter bounds
    if lower_bound is not None:
        for pval, bound in zip(params_up, lower_bound):
            if bound is not None and pval < bound:
                return -_out_of_bounds_val / ll_scale
    if upper_bound is not None:
        for pval, bound in zip(params_up, upper_bound):
            if bound is not None and pval > bound:
                return -_out_of_bounds_val / ll_scale

    ns = data.sample_sizes
    all_args = [params_up, ns, sel_dist, theta] + list(func_args)

    sfs = model_func(*all_args, **func_kwargs)
    if multinom:
        result = Inference.ll_multinom(sfs, data)
    else:
        result = Inference.ll(sfs, data)

    if store_thetas:
        global _theta_store
        _theta_store[tuple(params)] = optimal_sfs_scaling(sfs, data)

    # Bad result
    if numpy.isnan(result):
        result = _out_of_bounds_val

    if (verbose > 0) and (_counter % verbose == 0):
        param_str = 'array([%s])' % (', '.join(
            ['%- 12g' % v for v in params_up]))
        output_stream.write('%-8i, %-12g, %s%s' %
                            (_counter, result, param_str, os.linesep))
        Misc.delayed_flush(delay=flush_delay)

    return -result / ll_scale
Ejemplo n.º 3
0
def plot_1d_comp_Poisson(model,
                         data,
                         fig_num=None,
                         residual='Anscombe',
                         plot_masked=False,
                         show=True):
    """
    Poisson comparison between 1d model and data.


    model: 1-dimensional model SFS
    data: 1-dimensional data SFS
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    residual: 'Anscombe' for Anscombe residuals, which are more normally
              distributed for Poisson sampling. 'linear' for the linear
              residuals, which can be less biased.
    plot_masked: Additionally plots (in open circles) results for points in the
                 model or data that were masked.
    show: If True, execute pylab.show command to make sure plot displays.
    """
    if fig_num is None:
        f = pylab.gcf()
    else:
        f = pylab.figure(fig_num, figsize=(7, 7))
    pylab.clf()

    if data.folded and not model.folded:
        model = model.fold()

    masked_model, masked_data = Numerics.intersect_masks(model, data)

    ax = pylab.subplot(2, 1, 1)
    pylab.semilogy(masked_data, '-ob', label='data')
    pylab.semilogy(masked_model, '-or', label='model')

    if plot_masked:
        pylab.semilogy(masked_data.data, '--ob', mfc='w', zorder=-100)
        pylab.semilogy(masked_model.data, '--or', mfc='w', zorder=-100)

    ax.legend(loc='upper right')

    pylab.subplot(2, 1, 2, sharex=ax)
    if residual == 'Anscombe':
        resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data)
    elif residual == 'linear':
        resid = Inference.linear_Poisson_residual(masked_model, masked_data)
    else:
        raise ValueError("Unknown class of residual '%s'." % residual)
    pylab.plot(resid, '-og')

    if plot_masked:
        pylab.plot(resid.data, '--og', mfc='w', zorder=-100)

    ax.set_xlim(0, data.shape[0] - 1)
    if show:
        pylab.show()
Ejemplo n.º 4
0
def optimize(p0,
             data1,
             data2,
             cache1,
             cache2,
             model_func,
             sel_dist,
             scal_fac1,
             scal_fac2,
             theta1,
             theta2,
             lower_bound=None,
             upper_bound=None,
             verbose=0,
             flush_delay=0.5,
             epsilon=1e-3,
             gtol=1e-5,
             multinom=False,
             maxiter=None,
             full_output=False,
             func_args=[],
             func_kwargs={},
             fixed_params=None,
             ll_scale=1,
             output_file=None):

    if output_file:
        output_stream = file(output_file, 'w')
    else:
        output_stream = sys.stdout

    args = (data1, data2, cache1, cache2, model_func, sel_dist, scal_fac1,
            scal_fac2, theta1, theta2, lower_bound, upper_bound, verbose,
            multinom, flush_delay, func_args, func_kwargs, fixed_params,
            ll_scale, output_stream)

    p0 = Inference._project_params_down(p0, fixed_params)
    outputs = scipy.optimize.fmin_bfgs(_object_func,
                                       p0,
                                       epsilon=epsilon,
                                       args=args,
                                       gtol=gtol,
                                       full_output=True,
                                       disp=False,
                                       maxiter=maxiter)
    xopt, fopt, gopt, Bopt, func_calls, grad_calls, warnflag = outputs
    xopt = Inference._project_params_up(xopt, fixed_params)

    if output_file:
        output_stream.close()

    if not full_output:
        return [-fopt, xopt]
    else:
        return xopt, fopt, gopt, Bopt, func_calls, grad_calls, warnflag
Ejemplo n.º 5
0
def LRT_adjust(func_ex,
               grid_pts,
               all_boot,
               p0,
               data,
               nested_indices,
               multinom=True,
               eps=0.01):
    # XXX: Need to implement boot_theta_adjusts
    """
    First-order moment matching adjustment factor for likelihood ratio test

    func_ex: Model function for complex model
    grid_pts: Grid points at which to evaluate func_ex
    all_boot: List of bootstrap frequency spectra
    p0: Best-fit parameters for the simple model, with nested parameter
        explicity defined.  Although equal to values for simple model, should
        be in a list form that can be taken in by the complex model you'd like
        to evaluate.
    data: Original data frequency spectrum
    nested_indices: List of positions of nested parameters in complex model
                    parameter list
    multinom: If True, assume model is defined without an explicit parameter for
              theta. Because uncertainty in theta must be accounted for to get
              correct uncertainties for other parameters, this function will
              automatically consider theta if multinom=True.
    eps: Fractional stepsize to use when taking finite-difference derivatives
         Note that if eps*param is < 1e-6, then the step size for that parameter
         will simply be eps, to avoid numerical issues with small parameter
         perturbations.
    """
    if multinom:
        func_multi = func_ex
        model = func_multi(p0, data.sample_sizes, grid_pts)
        theta_opt = Inference.optimal_sfs_scaling(model, data)
        p0 = list(p0) + [theta_opt]
        func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts)

    # We only need to take derivatives with respect to the parameters in the
    # complex model that have been set to specified values in the simple model
    def diff_func(diff_params, ns, grid_pts):
        # diff_params argument is only the nested parameters. All the rest
        # should come from p0
        full_params = numpy.array(p0, copy=True, dtype=float)
        # Use numpy indexing to set relevant parameters
        full_params[nested_indices] = diff_params
        return func_ex(full_params, ns, grid_pts)

    p_nested = numpy.asarray(p0)[nested_indices]
    GIM, H, J, cU = get_godambe(diff_func,
                                grid_pts,
                                all_boot,
                                p_nested,
                                data,
                                eps,
                                log=False)

    adjust = len(nested_indices) / numpy.trace(
        numpy.dot(J, numpy.linalg.inv(H)))
    return adjust
Ejemplo n.º 6
0
 def func(params, data, theta_adjust=1):
     key = (tuple(params), tuple(ns), tuple(grid_pts))
     if key not in cache:
         cache[key] = func_ex(params, ns, grid_pts)
     # theta_adjust deals with bootstraps that need  different thetas
     fs = theta_adjust * cache[key]
     return Inference.ll(fs, data)
Ejemplo n.º 7
0
def FIM_uncert(func_ex,
               grid_pts,
               p0,
               data,
               log=False,
               multinom=True,
               eps=0.01):
    """
    Parameter uncertainties from Fisher Information Matrix

    Returns standard deviations of parameter values.

    func_ex: Model function
    all_boot: List of bootstrap frequency spectra
    p0: Best-fit parameters for func_ex
    data: Original data frequency spectrum
    eps: Fractional stepsize to use when taking finite-difference derivatives
    log: If True, assume log-normal distribution of parameters. Returned values 
         are then the standard deviations of the *logs* of the parameter values,
         which can be interpreted as relative parameter uncertainties.
    multinom: If True, assume model is defined without an explicit parameter for
              theta. Because uncertainty in theta must be accounted for to get
              correct uncertainties for other parameters, this function will
              automatically consider theta if multinom=True. In that case, the
              final entry of the returned uncertainties will correspond to
              theta.
    """
    if multinom:
        func_multi = func_ex
        model = func_multi(p0, data.sample_sizes, grid_pts)
        theta_opt = Inference.optimal_sfs_scaling(model, data)
        p0 = list(p0) + [theta_opt]
        func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts)
    H = get_godambe(func_ex, grid_pts, [], p0, data, eps, log, just_hess=True)
    return numpy.sqrt(numpy.diag(numpy.linalg.inv(H)))
Ejemplo n.º 8
0
def plot_3d_comp_multinom(model, data, vmin=None, vmax=None,
                          resid_range=None, fig_num=None,
                          pop_ids=None, residual='Anscombe', adjust=True):
    """
    Multinomial comparison between 3d model and data.


    model: 3-dimensional model SFS
    data: 3-dimensional data SFS
    vmin, vmax: Minimum and maximum values plotted for sfs are vmin and
                vmax respectively.
    resid_range: Residual plot saturates at +- resid_range.
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    pop_ids: If not None, override pop_ids stored in Spectrum.
    residual: 'Anscombe' for Anscombe residuals, which are more normally
              distributed for Poisson sampling. 'linear' for the linear
              residuals, which can be less biased.
    adjust: Should method use automatic 'subplots_adjust'? For advanced
            manipulation of plots, it may be useful to make this False.

    This comparison is multinomial in that it rescales the model to optimally
    fit the data.
    """
    model = Inference.optimally_scaled_sfs(model, data)

    plot_3d_comp_Poisson(model, data, vmin=vmin, vmax=vmax,
                         resid_range=resid_range, fig_num=fig_num,
                         pop_ids=pop_ids, residual=residual,
                         adjust=adjust)
def plot_1d_comp_multinom(model,
                          data,
                          fig_num=None,
                          residual='Anscombe',
                          plot_masked=False):
    """
    Mulitnomial comparison between 1d model and data.


    model: 1-dimensional model SFS
    data: 1-dimensional data SFS
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    residual: 'Anscombe' for Anscombe residuals, which are more normally
              distributed for Poisson sampling. 'linear' for the linear
              residuals, which can be less biased.
    plot_masked: Additionally plots (in open circles) results for points in the 
                 model or data that were masked.

    This comparison is multinomial in that it rescales the model to optimally
    fit the data.
    """
    model = Inference.optimally_scaled_sfs(model, data)

    plot_1d_comp_Poisson(model, data, fig_num, residual, plot_masked)
Ejemplo n.º 10
0
def GIM_uncert(func_ex,
               grid_pts,
               all_boot,
               p0,
               data,
               log=False,
               multinom=True,
               eps=0.01,
               return_GIM=False,
               boot_theta_adjusts=None):
    """
    Parameter uncertainties from Godambe Information Matrix (GIM)

    Returns standard deviations of parameter values.

    func_ex: Model function
    all_boot: List of bootstrap frequency spectra
    p0: Best-fit parameters for func_ex
    data: Original data frequency spectrum
    eps: Fractional stepsize to use when taking finite-difference derivatives.
         Note that if eps*param is < 1e-6, then the step size for that parameter
         will simply be eps, to avoid numerical issues with small parameter
         perturbations.
    log: If True, assume log-normal distribution of parameters. Returned values
         are then the standard deviations of the *logs* of the parameter values,
         which can be interpreted as relative parameter uncertainties.
    multinom: If True, assume model is defined without an explicit parameter for
              theta. Because uncertainty in theta must be accounted for to get
              correct uncertainties for other parameters, this function will
              automatically consider theta if multinom=True. In that case, the
              final entry of the returned uncertainties will correspond to
              theta.
    return_GIM: If true, also return the full GIM.
    boot_theta_adjusts: Optionally, a sequence of *relative* values of theta
                        (compared to original data) to assume for bootstrap
                        data sets. Only valid when multinom=False.
    """
    if multinom:
        if boot_theta_adjusts:
            raise ValueError('boot_thetas option can only be used with '
                             'multinom=False')
        func_multi = func_ex
        model = func_multi(p0, data.sample_sizes, grid_pts)
        theta_opt = Inference.optimal_sfs_scaling(model, data)
        p0 = list(p0) + [theta_opt]
        func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts)
    GIM, H, J, cU = get_godambe(func_ex,
                                grid_pts,
                                all_boot,
                                p0,
                                data,
                                eps,
                                log,
                                boot_theta_adjusts=boot_theta_adjusts)
    uncerts = numpy.sqrt(numpy.diag(numpy.linalg.inv(GIM)))
    if not return_GIM:
        return uncerts
    else:
        return uncerts, GIM
Ejemplo n.º 11
0
def optimize(p0,
             data,
             model_func,
             sel_dist,
             theta,
             lower_bound=None,
             upper_bound=None,
             verbose=0,
             flush_delay=0.5,
             epsilon=1e-3,
             gtol=1e-5,
             multinom=False,
             maxiter=None,
             full_output=False,
             func_args=[],
             func_kwargs={},
             fixed_params=None,
             ll_scale=1,
             output_file=None):
    """
    optimizer for use with distributions where log transformations do not work,
    e.g. when gamma is positive and negative
    """
    if output_file:
        output_stream = file(output_file, 'w')
    else:
        output_stream = sys.stdout

    args = (data, model_func, sel_dist, theta, lower_bound, upper_bound,
            verbose, multinom, flush_delay, func_args, func_kwargs,
            fixed_params, ll_scale, output_stream)

    p0 = _project_params_down(p0, fixed_params)
    outputs = scipy.optimize.fmin_bfgs(_object_func,
                                       p0,
                                       epsilon=epsilon,
                                       args=args,
                                       gtol=gtol,
                                       full_output=True,
                                       disp=False,
                                       maxiter=maxiter)
    xopt, fopt, gopt, Bopt, func_calls, grad_calls, warnflag = outputs
    xopt = Inference._project_params_up(xopt, fixed_params)

    if output_file:
        output_stream.close()

    if not full_output:
        return xopt
    else:
        return xopt, fopt, gopt, Bopt, func_calls, grad_calls, warnflag
def plot_1d_comp_Poisson(model, data, fig_num=None, residual='Anscombe',
                         plot_masked=False):
    """
    Poisson comparison between 1d model and data.


    model: 1-dimensional model SFS
    data: 1-dimensional data SFS
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    residual: 'Anscombe' for Anscombe residuals, which are more normally
              distributed for Poisson sampling. 'linear' for the linear
              residuals, which can be less biased.
    plot_masked: Additionally plots (in open circles) results for points in the 
                 model or data that were masked.
    """
    if fig_num is None:
        f = pylab.gcf()
    else:
        f = pylab.figure(fig_num, figsize=(7,7))
    pylab.clf()

    if data.folded and not model.folded:
        model = model.fold()

    masked_model, masked_data = Numerics.intersect_masks(model, data)

    ax = pylab.subplot(2,1,1)
    pylab.semilogy(masked_data, '-ob')
    pylab.semilogy(masked_model, '-or')

    if plot_masked:
        pylab.semilogy(masked_data.data, '--ob', mfc='w', zorder=-100)
        pylab.semilogy(masked_model.data, '--or', mfc='w', zorder=-100)

    pylab.subplot(2,1,2, sharex = ax)
    if residual == 'Anscombe':
        resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data)
    elif residual == 'linear':
        resid = Inference.linear_Poisson_residual(masked_model, masked_data)
    else:
        raise ValueError("Unknown class of residual '%s'." % residual)
    pylab.plot(resid, '-og')
    if plot_masked:
        pylab.plot(resid.data, '--og', mfc='w', zorder=-100)

    ax.set_xlim(0, data.shape[0]-1)
    pylab.show()
def plot_1d_comp_multinom(model, data, fig_num=None, residual='Anscombe',
                          plot_masked=False):
    """
    Mulitnomial comparison between 1d model and data.


    model: 1-dimensional model SFS
    data: 1-dimensional data SFS
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    residual: 'Anscombe' for Anscombe residuals, which are more normally
              distributed for Poisson sampling. 'linear' for the linear
              residuals, which can be less biased.
    plot_masked: Additionally plots (in open circles) results for points in the 
                 model or data that were masked.

    This comparison is multinomial in that it rescales the model to optimally
    fit the data.
    """
    model = Inference.optimally_scaled_sfs(model, data)

    plot_1d_comp_Poisson(model, data, fig_num, residual,
                         plot_masked)
Ejemplo n.º 14
0
def plot_2d_comp_Poisson(model, data, vmin=None, vmax=None,
                         resid_range=None, fig_num=None,
                         pop_ids=None, residual='Anscombe',
                         adjust=True, saveplot=False,
                         nomplot="plot_2d_comp_Poisson",
                         showplot=True):
    """
    Poisson comparison between 2d model and data.


    model: 2-dimensional model SFS
    data: 2-dimensional data SFS
    vmin, vmax: Minimum and maximum values plotted for sfs are vmin and
                vmax respectively.
    resid_range: Residual plot saturates at +- resid_range.
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    pop_ids: If not None, override pop_ids stored in Spectrum.
    residual: 'Anscombe' for Anscombe residuals, which are more normally
              distributed for Poisson sampling. 'linear' for the linear
              residuals, which can be less biased.
    adjust: Should method use automatic 'subplots_adjust'? For advanced
            manipulation of plots, it may be useful to make this False.
    """
    if data.folded and not model.folded:
        model = model.fold()

    masked_model, masked_data = Numerics.intersect_masks(model, data)

    if fig_num is None:
        f = pylab.gcf()
    else:
        f = pylab.figure(fig_num, figsize=(7,7))

    pylab.clf()
    if adjust:
        pylab.subplots_adjust(bottom=0.07, left=0.07, top=0.94, right=0.95, 
                              hspace=0.26, wspace=0.26)

    max_toplot = max(masked_model.max(), masked_data.max())
    min_toplot = min(masked_model.min(), masked_data.min())
    if vmax is None:
        vmax = max_toplot
    if vmin is None:
        vmin = min_toplot
    extend = _extend_mapping[vmin <= min_toplot, vmax >= max_toplot]

    if pop_ids is not None:
        data_pop_ids = model_pop_ids = resid_pop_ids = pop_ids
        if len(pop_ids) != 2:
            raise ValueError('pop_ids must be of length 2.')
    else:
        data_pop_ids = masked_data.pop_ids
        model_pop_ids = masked_model.pop_ids
        if masked_model.pop_ids is None:
            model_pop_ids = data_pop_ids

        if model_pop_ids == data_pop_ids:
           resid_pop_ids = model_pop_ids
        else:
            resid_pop_ids = None

    ax = pylab.subplot(2,2,1)
    plot_single_2d_sfs(masked_data, vmin=vmin, vmax=vmax,
                       pop_ids=data_pop_ids, colorbar=False)
    ax.set_title('data')

    ax2 = pylab.subplot(2,2,2, sharex=ax, sharey=ax)
    plot_single_2d_sfs(masked_model, vmin=vmin, vmax=vmax,
                       pop_ids=model_pop_ids, extend=extend )
    ax2.set_title('model')

    if residual == 'Anscombe':
        resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data,
                                              mask=vmin)
    elif residual == 'linear':
        resid = Inference.linear_Poisson_residual(masked_model, masked_data,
                                            mask=vmin)
    else:
        raise ValueError("Unknown class of residual '%s'." % residual)

    if resid_range is None:
        resid_range = max((abs(resid.max()), abs(resid.min())))
    resid_extend = _extend_mapping[-resid_range <= resid.min(), 
                                   resid_range >= resid.max()]

    ax3 = pylab.subplot(2,2,3, sharex=ax, sharey=ax)
    plot_2d_resid(resid, resid_range, pop_ids=resid_pop_ids,
                  extend=resid_extend)
    ax3.set_title('residuals')

    ax = pylab.subplot(2,2,4)
    flatresid = numpy.compress(numpy.logical_not(resid.mask.ravel()), 
                               resid.ravel())
    ax.hist(flatresid, bins=20, normed=True)
    ax.set_title('residuals')
    ax.set_yticks([])
    if saveplot:
        nomplot=nomplot + ".png"    
        pylab.savefig(nomplot)
    if showplot:
        pylab.show()
Ejemplo n.º 15
0
def score_stat(func_ex,
               grid_pts,
               all_boot,
               p0,
               data,
               nested_indices,
               multinom=True,
               eps=0.01,
               adj_and_org=False):
    """
    Calculate test stastic from score test
        
    func_ex: Model function for complex model
    grid_pts: Grid points to evaluate model function
    all_boot: List of bootstrap frequency spectra
    p0: Best-fit parameters for the simple model, with nested parameter
        explicity defined.  Although equal to values for simple model, should
        be in a list form that can be taken in by the complex model you'd like
        to evaluate.
    data: Original data frequency spectrum
    nested_indices: List of positions of nested parameters in complex model
                    parameter list
    eps: Fractional stepsize to use when taking finite-difference derivatives
    multinom: If True, assume model is defined without an explicit parameter for
              theta. Because uncertainty in theta must be accounted for to get
              correct uncertainties for other parameters, this function will
              automatically consider theta if multinom=True.
    adj_and_org: If False, return only adjusted score statistic. If True, also
                 return unadjusted statistic as second return value.
    """
    if multinom:
        func_multi = func_ex
        model = func_multi(p0, data.sample_sizes, grid_pts)
        theta_opt = Inference.optimal_sfs_scaling(model, data)
        p0 = list(p0) + [theta_opt]
        func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts)

    # We only need to take derivatives with respect to the parameters in the
    # complex model that have been set to specified values in the simple model
    def diff_func(diff_params, ns, grid_pts):
        # diff_params argument is only the nested parameters. All the rest
        # should come from p0
        full_params = numpy.array(p0, copy=True, dtype=float)
        # Use numpy indexing to set relevant parameters
        full_params[nested_indices] = diff_params
        return func_ex(full_params, ns, grid_pts)

    p_nested = numpy.asarray(p0)[nested_indices]
    GIM, H, J, cU = get_godambe(diff_func,
                                grid_pts,
                                all_boot,
                                p_nested,
                                data,
                                eps,
                                log=False)

    score_org = numpy.dot(numpy.dot(numpy.transpose(cU), numpy.linalg.inv(H)),
                          cU)[0, 0]
    score_adj = numpy.dot(numpy.dot(numpy.transpose(cU), numpy.linalg.inv(J)),
                          cU)[0, 0]

    if adj_and_org:
        return score_adj, score_org
    return score_adj
Ejemplo n.º 16
0
            grad_temp = get_grad(func, numpy.log(p0), eps)
        J_temp = numpy.outer(grad_temp, grad_temp)
        J = J + J_temp
    J = J/len(all_boot)
    J_inv = numpy.linalg.inv(J)
    # G = H*J^-1*H
    godambe = numpy.dot(numpy.dot(hess, J_inv), hess)
    return godambe, hess

def uncert(func_ex, all_boot, p0, data, eps, log=True):
    godambe, hess = get_godambe(func_ex, all_boot, p0, data, eps, log)
    return numpy.sqrt(numpy.diag(numpy.linalg.inv((godambe))))

def LRT(func_ex, all_boot, p0, data, eps, diff=1)
    #p0 is the best fit parameters in the simple model with the complex model parameter(s) as the first diff number of parameters in p0
    adjust = 0.0
    func = lambda param: Inference.ll_multinom(func_ex([param[:diff]+p0[diff:]], ns, grid_pts), data)
    H = -get_hess_log(func, p0[:diff], eps)
    J_boot = numpy.zeros([diff, diff])
    J_array = []
    for i in range(0, len(all_boot)):
        boot = Spectrum(all_boot[i])
        func = lambda param: Inference.ll_multinom(func_ex([param[:diff]+p0[diff:]], ns, grid_pts), boot)
        cU_theta = get_grad_log(func, p0[:diff], eps)
        J_theta = numpy.outer(cU_theta, cU_theta)
        J_boot = J_boot + J_theta
        J_array.append(J_theta)
    J = J_boot/len(all_boot)
    adjust = diff/numpy.trace(numpy.dot(J, numpy.linalg.inv(H)))
    return adjust
Ejemplo n.º 17
0
	maxiter=50)

print('Optimized parameters', str(repr(popt)))

model = func_ex(popt[0], ns, pts_l)
ll_opt = dadi.Inference.ll_multinom(model, pop)

print('Optimized log-likelihood:', str(ll_opt))
print('Theta0_2:', str(theta0))



##Print out the scaled SFS and the Anscombe Poisson residuals
print("==============================================")

print(Inference.optimally_scaled_sfs(model, pop))
rescaled = Inference.optimally_scaled_sfs(model, pop)
print(Inference.Anscombe_Poisson_residual(rescaled,pop))
dadi.Plotting.plot_1d_comp_multinom(model,pop)

print("==============================================")




####Neutral Fit#########
###########################

print("NEUTRAL MODEL")

params = [1]
Ejemplo n.º 18
0
def optimize_cons(p0,
                  data,
                  model_func,
                  sel_dist,
                  theta,
                  lower_bound=None,
                  upper_bound=None,
                  verbose=0,
                  flush_delay=0.5,
                  epsilon=1e-4,
                  constraint=None,
                  gtol=1e-6,
                  multinom=False,
                  maxiter=None,
                  full_output=False,
                  func_args=[],
                  func_kwargs={},
                  fixed_params=None,
                  ll_scale=1,
                  output_file=None):
    """
    Constrained optimization needs a constraint function and bounds.
    """

    if output_file:
        output_stream = file(output_file, 'w')
    else:
        output_stream = sys.stdout

    if not (lower_bound is None):
        lower_bound_a = lower_bound + [0]
    if not (upper_bound is None):
        upper_bound_a = upper_bound + [numpy.inf]

    args = (data, model_func, sel_dist, theta, lower_bound, upper_bound,
            verbose, multinom, flush_delay, func_args, func_kwargs,
            fixed_params, ll_scale, output_stream)

    p0 = Inference._project_params_down(p0, fixed_params)

    ####make sure to define consfunc and bnds ####
    if (not lower_bound is None) and (not upper_bound is None):
        bnds = tuple((x, y) for x, y in zip(lower_bound, upper_bound))
    outputs = scipy.optimize.fmin_slsqp(_object_func,
                                        p0,
                                        bounds=bnds,
                                        args=args,
                                        f_eqcons=constraint,
                                        epsilon=epsilon,
                                        iter=maxiter,
                                        full_output=True,
                                        disp=False)
    xopt, fopt, func_calls, grad_calls, warnflag = outputs
    xopt = Inference._project_params_up(xopt, fixed_params)

    if output_file:
        output_stream.close()

    if not full_output:
        return [-fopt, xopt]
    else:
        return xopt, fopt, func_calls, grad_calls, warnflag
Ejemplo n.º 19
0
def _object_func(params,
                 data1,
                 data2,
                 cache1,
                 cache2,
                 model_func,
                 sel_dist,
                 scal_fac1,
                 scal_fac2,
                 theta1,
                 theta2,
                 lower_bound=None,
                 upper_bound=None,
                 verbose=0,
                 multinom=False,
                 flush_delay=0,
                 func_args=[],
                 func_kwargs={},
                 fixed_params1=None,
                 fixed_params2=None,
                 ll_scale=1,
                 output_stream=sys.stdout,
                 store_thetas=False):
    """
    Objective function for optimization.
    """
    global _counter
    _counter += 1

    # Scaling factors scales sel_dist differently for species 1 and species 2

    sel_dist1 = copy_func(
        sel_dist, defaults=scal_fac1)  # scal_fac1 should be 2*Nea of pop 1
    sel_dist2 = copy_func(
        sel_dist, defaults=scal_fac2)  # scal_fac2 should be 4*Nea of pop 2

    # Deal with fixed parameters
    params_up1 = Inference._project_params_up(params, fixed_params1)
    params_up2 = Inference._project_params_up(params, fixed_params2)

    # Check our parameter bounds
    if lower_bound is not None:
        for pval, bound in zip(params_up1, lower_bound):
            if bound is not None and pval < bound:
                return -_out_of_bounds_val / ll_scale
    if upper_bound is not None:
        for pval, bound in zip(params_up1, upper_bound):
            if bound is not None and pval > bound:
                return -_out_of_bounds_val / ll_scale

    ns1 = data1.sample_sizes
    ns2 = data2.sample_sizes
    all_args1 = [params_up1, ns1, sel_dist1, theta1, cache1] + list(func_args)
    all_args2 = [params_up2, ns2, sel_dist2, theta2, cache2] + list(func_args)
    # Pass the pts argument via keyword, but don't alter the passed-in
    # func_kwargs
    #func_kwargs = func_kwargs.copy()
    #func_kwargs['pts'] = pts
    sfs1 = model_func(*all_args1, **func_kwargs)
    sfs2 = model_func(*all_args2, **func_kwargs)
    if multinom:
        result = Inference.ll_multinom(sfs1, data1) + Inference.ll_multinom(
            sfs2, data2)
    else:
        result = Inference.ll(sfs1, data1) + Inference.ll(sfs2, data2)

    # Bad result
    if numpy.isnan(result):
        result = _out_of_bounds_val

    if (verbose > 0) and (_counter % verbose == 0):
        param_str = 'array([%s])' % (', '.join(
            ['%- 12g' % v for v in params_up1]))
        output_stream.write('%-8i, %-12g, %s%s' %
                            (_counter, result, param_str, os.linesep))
        Misc.delayed_flush(delay=flush_delay)

    return -result / ll_scale
Ejemplo n.º 20
0
 def func(params, data):
     key = (tuple(params), tuple(ns), tuple(grid_pts))
     if key not in cache:
         cache[key] = func_ex(params, ns, grid_pts)
     fs = cache[key]
     return Inference.ll(fs, data)
Ejemplo n.º 21
0
def Wald_stat(func_ex,
              grid_pts,
              all_boot,
              p0,
              data,
              nested_indices,
              full_params,
              multinom=True,
              eps=0.01,
              adj_and_org=False):
    # XXX: Implement boot_theta_adjusts
    """
    Calculate test stastic from wald test
             
    func_ex: Model function for complex model
    all_boot: List of bootstrap frequency spectra
    p0: Best-fit parameters for the simple model, with nested parameter
        explicity defined.  Although equal to values for simple model, should
        be in a list form that can be taken in by the complex model you'd like
        to evaluate.
    data: Original data frequency spectrum
    nested_indices: List of positions of nested parameters in complex model
    parameter list
    full_params: Parameter values for parameters found only in complex model,
                 Can either be array with just values found only in the compelx
                 model, or entire list of parameters from complex model.
    multinom: If True, assume model is defined without an explicit parameter for
              theta. Because uncertainty in theta must be accounted for to get
              correct uncertainties for other parameters, this function will
              automatically consider theta if multinom=True. In that case, the
              final entry of the returned uncertainties will correspond to
              theta.
    eps: Fractional stepsize to use when taking finite-difference derivatives
         Note that if eps*param is < 1e-6, then the step size for that parameter
         will simply be eps, to avoid numerical issues with small parameter
         perturbations.
    adj_and_org: If False, return only adjusted Wald statistic. If True, also
                 return unadjusted statistic as second return value.
    """
    if multinom:
        func_multi = func_ex
        model = func_multi(p0, data.sample_sizes, grid_pts)
        theta_opt = Inference.optimal_sfs_scaling(model, data)
        # Also need to extend full_params
        if len(full_params) == len(p0):
            full_params = numpy.concatenate((full_params, [theta_opt]))
        p0 = list(p0) + [theta_opt]
        func_ex = lambda p, ns, pts: p[-1] * func_multi(p[:-1], ns, pts)

    # We only need to take derivatives with respect to the parameters in the
    # complex model that have been set to specified values in the simple model
    def diff_func(diff_params, ns, grid_pts):
        # diff_params argument is only the nested parameters. All the rest
        # should come from p0
        full_params = numpy.array(p0, copy=True, dtype=float)
        # Use numpy indexing to set relevant parameters
        full_params[nested_indices] = diff_params
        return func_ex(full_params, ns, grid_pts)

    # Reduce full params list to be same length as nested indices
    if len(full_params) == len(p0):
        full_params = numpy.asarray(full_params)[nested_indices]
    if len(full_params) != len(nested_indices):
        raise KeyError('Full parameters not equal in length to p0 or nested '
                       'indices')

    p_nested = numpy.asarray(p0)[nested_indices]
    GIM, H, J, cU = get_godambe(diff_func,
                                grid_pts,
                                all_boot,
                                p_nested,
                                data,
                                eps,
                                log=False)
    param_diff = full_params - p_nested

    wald_adj = numpy.dot(numpy.dot(numpy.transpose(param_diff), GIM),
                         param_diff)
    wald_org = numpy.dot(numpy.dot(numpy.transpose(param_diff), H), param_diff)

    if adj_and_org:
        return wald_adj, wald_org
    return wald_adj