Exemple #1
0
def plot_1d_comp_Poisson(model,
                         data,
                         fig_num=None,
                         residual='Anscombe',
                         plot_masked=False,
                         show=True):
    """
    Poisson comparison between 1d model and data.


    model: 1-dimensional model SFS
    data: 1-dimensional data SFS
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    residual: 'Anscombe' for Anscombe residuals, which are more normally
              distributed for Poisson sampling. 'linear' for the linear
              residuals, which can be less biased.
    plot_masked: Additionally plots (in open circles) results for points in the
                 model or data that were masked.
    show: If True, execute pylab.show command to make sure plot displays.
    """
    if fig_num is None:
        f = pylab.gcf()
    else:
        f = pylab.figure(fig_num, figsize=(7, 7))
    pylab.clf()

    if data.folded and not model.folded:
        model = model.fold()

    masked_model, masked_data = Numerics.intersect_masks(model, data)

    ax = pylab.subplot(2, 1, 1)
    pylab.semilogy(masked_data, '-ob', label='data')
    pylab.semilogy(masked_model, '-or', label='model')

    if plot_masked:
        pylab.semilogy(masked_data.data, '--ob', mfc='w', zorder=-100)
        pylab.semilogy(masked_model.data, '--or', mfc='w', zorder=-100)

    ax.legend(loc='upper right')

    pylab.subplot(2, 1, 2, sharex=ax)
    if residual == 'Anscombe':
        resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data)
    elif residual == 'linear':
        resid = Inference.linear_Poisson_residual(masked_model, masked_data)
    else:
        raise ValueError("Unknown class of residual '%s'." % residual)
    pylab.plot(resid, '-og')

    if plot_masked:
        pylab.plot(resid.data, '--og', mfc='w', zorder=-100)

    ax.set_xlim(0, data.shape[0] - 1)
    if show:
        pylab.show()
Exemple #2
0
def optimal_sfs_scaling(model, data):
    """
    Optimal multiplicative scaling factor between model and data.

    This scaling is based on only those entries that are masked in neither
    model nor data.
    """
    if data.folded and not model.folded:
        model = model.fold()

    model, data = Numerics.intersect_masks(model, data)
    return data.sum() / model.sum()
def optimal_sfs_scaling(model, data):
    """
    Optimal multiplicative scaling factor between model and data.

    This scaling is based on only those entries that are masked in neither
    model nor data.
    """
    if data.folded and not model.folded:
        model = model.fold()

    model, data = Numerics.intersect_masks(model, data)
    return data.sum()/model.sum()
def plot_1d_comp_Poisson(model, data, fig_num=None, residual='Anscombe',
                         plot_masked=False):
    """
    Poisson comparison between 1d model and data.


    model: 1-dimensional model SFS
    data: 1-dimensional data SFS
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    residual: 'Anscombe' for Anscombe residuals, which are more normally
              distributed for Poisson sampling. 'linear' for the linear
              residuals, which can be less biased.
    plot_masked: Additionally plots (in open circles) results for points in the 
                 model or data that were masked.
    """
    if fig_num is None:
        f = pylab.gcf()
    else:
        f = pylab.figure(fig_num, figsize=(7,7))
    pylab.clf()

    if data.folded and not model.folded:
        model = model.fold()

    masked_model, masked_data = Numerics.intersect_masks(model, data)

    ax = pylab.subplot(2,1,1)
    pylab.semilogy(masked_data, '-ob')
    pylab.semilogy(masked_model, '-or')

    if plot_masked:
        pylab.semilogy(masked_data.data, '--ob', mfc='w', zorder=-100)
        pylab.semilogy(masked_model.data, '--or', mfc='w', zorder=-100)

    pylab.subplot(2,1,2, sharex = ax)
    if residual == 'Anscombe':
        resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data)
    elif residual == 'linear':
        resid = Inference.linear_Poisson_residual(masked_model, masked_data)
    else:
        raise ValueError("Unknown class of residual '%s'." % residual)
    pylab.plot(resid, '-og')
    if plot_masked:
        pylab.plot(resid.data, '--og', mfc='w', zorder=-100)

    ax.set_xlim(0, data.shape[0]-1)
    pylab.show()
Exemple #5
0
def plot_3d_comp_Poisson(model, data, vmin=None, vmax=None,
                         resid_range=None, fig_num=None, pop_ids=None, 
                         residual='Anscombe', adjust=True):
    """
    Poisson comparison between 3d model and data.


    model: 3-dimensional model SFS
    data: 3-dimensional data SFS
    vmin, vmax: Minimum and maximum values plotted for sfs are vmin and
                vmax respectively.
    resid_range: Residual plot saturates at +- resid_range.
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    pop_ids: If not None, override pop_ids stored in Spectrum.
    residual: 'Anscombe' for Anscombe residuals, which are more normally
              distributed for Poisson sampling. 'linear' for the linear
              residuals, which can be less biased.
    adjust: Should method use automatic 'subplots_adjust'? For advanced
            manipulation of plots, it may be useful to make this False.
    """
    if data.folded and not model.folded:
        model = model.fold()

    masked_model, masked_data = Numerics.intersect_masks(model, data)

    if fig_num is None:
        f = pylab.gcf()
    else:
        f = pylab.figure(fig_num, figsize=(8,10))

    pylab.clf()
    if adjust:
        pylab.subplots_adjust(bottom=0.07, left=0.07, top=0.95, right=0.95)

    modelmax = max(masked_model.sum(axis=sax).max() for sax in range(3))
    datamax = max(masked_data.sum(axis=sax).max() for sax in range(3))
    modelmin = min(masked_model.sum(axis=sax).min() for sax in range(3))
    datamin = min(masked_data.sum(axis=sax).min() for sax in range(3))
    max_toplot = max(modelmax, datamax)
    min_toplot = min(modelmin, datamin)

    if vmax is None:
        vmax = max_toplot
    if vmin is None:
        vmin = min_toplot
    extend = _extend_mapping[vmin <= min_toplot, vmax >= max_toplot]

    # Calculate the residuals
    if residual == 'Anscombe':
        resids = [Inference.\
                  Anscombe_Poisson_residual(masked_model.sum(axis=2-sax), 
                                            masked_data.sum(axis=2-sax), 
                                            mask=vmin) for sax in range(3)]
    elif residual == 'linear':
        resids =[Inference.\
                 linear_Poisson_residual(masked_model.sum(axis=2-sax), 
                                         masked_data.sum(axis=2-sax), 
                                         mask=vmin) for sax in range(3)]
    else:
        raise ValueError("Unknown class of residual '%s'." % residual)


    min_resid = min([r.min() for r in resids])
    max_resid = max([r.max() for r in resids])
    if resid_range is None:
        resid_range = max((abs(max_resid), abs(min_resid)))
    resid_extend = _extend_mapping[-resid_range <= min_resid, 
                                   resid_range >= max_resid]

    if pop_ids is not None:
        if len(pop_ids) != 3:
            raise ValueError('pop_ids must be of length 3.')
        data_ids = model_ids = resid_ids = pop_ids
    else:
        data_ids = masked_data.pop_ids
        model_ids = masked_model.pop_ids

        if model_ids is None:
            model_ids = data_ids

        if model_ids == data_ids:
           resid_ids = model_ids
        else:
            resid_ids = None

    for sax in range(3):
        marg_data = masked_data.sum(axis=2-sax)
        marg_model = masked_model.sum(axis=2-sax)

        curr_ids = []
        for ids in [data_ids, model_ids, resid_ids]:
            if ids is None:
                ids = ['pop0', 'pop1', 'pop2']

            if ids is not None:
                ids = list(ids)
                del ids[2-sax]

            curr_ids.append(ids)

        ax = pylab.subplot(4,3,sax+1)
        plot_colorbar = (sax == 2)
        plot_single_2d_sfs(marg_data, vmin=vmin, vmax=vmax, pop_ids=curr_ids[0],
                           extend=extend, colorbar=plot_colorbar)

        pylab.subplot(4,3,sax+4, sharex=ax, sharey=ax)
        plot_single_2d_sfs(marg_model, vmin=vmin, vmax=vmax, 
                           pop_ids=curr_ids[1], extend=extend, colorbar=False)

        resid = resids[sax]
        pylab.subplot(4,3,sax+7, sharex=ax, sharey=ax)
        plot_2d_resid(resid, resid_range, pop_ids=curr_ids[2],
                      extend=resid_extend, colorbar=plot_colorbar)

        ax = pylab.subplot(4,3,sax+10)
        flatresid = numpy.compress(numpy.logical_not(resid.mask.ravel()), 
                                   resid.ravel())
        ax.hist(flatresid, bins=20, normed=True)
        ax.set_yticks([])
    pylab.show()
Exemple #6
0
def plot_2d_comp_Poisson(model, data, vmin=None, vmax=None,
                         resid_range=None, fig_num=None,
                         pop_ids=None, residual='Anscombe',
                         adjust=True, saveplot=False,
                         nomplot="plot_2d_comp_Poisson",
                         showplot=True):
    """
    Poisson comparison between 2d model and data.


    model: 2-dimensional model SFS
    data: 2-dimensional data SFS
    vmin, vmax: Minimum and maximum values plotted for sfs are vmin and
                vmax respectively.
    resid_range: Residual plot saturates at +- resid_range.
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    pop_ids: If not None, override pop_ids stored in Spectrum.
    residual: 'Anscombe' for Anscombe residuals, which are more normally
              distributed for Poisson sampling. 'linear' for the linear
              residuals, which can be less biased.
    adjust: Should method use automatic 'subplots_adjust'? For advanced
            manipulation of plots, it may be useful to make this False.
    """
    if data.folded and not model.folded:
        model = model.fold()

    masked_model, masked_data = Numerics.intersect_masks(model, data)

    if fig_num is None:
        f = pylab.gcf()
    else:
        f = pylab.figure(fig_num, figsize=(7,7))

    pylab.clf()
    if adjust:
        pylab.subplots_adjust(bottom=0.07, left=0.07, top=0.94, right=0.95, 
                              hspace=0.26, wspace=0.26)

    max_toplot = max(masked_model.max(), masked_data.max())
    min_toplot = min(masked_model.min(), masked_data.min())
    if vmax is None:
        vmax = max_toplot
    if vmin is None:
        vmin = min_toplot
    extend = _extend_mapping[vmin <= min_toplot, vmax >= max_toplot]

    if pop_ids is not None:
        data_pop_ids = model_pop_ids = resid_pop_ids = pop_ids
        if len(pop_ids) != 2:
            raise ValueError('pop_ids must be of length 2.')
    else:
        data_pop_ids = masked_data.pop_ids
        model_pop_ids = masked_model.pop_ids
        if masked_model.pop_ids is None:
            model_pop_ids = data_pop_ids

        if model_pop_ids == data_pop_ids:
           resid_pop_ids = model_pop_ids
        else:
            resid_pop_ids = None

    ax = pylab.subplot(2,2,1)
    plot_single_2d_sfs(masked_data, vmin=vmin, vmax=vmax,
                       pop_ids=data_pop_ids, colorbar=False)
    ax.set_title('data')

    ax2 = pylab.subplot(2,2,2, sharex=ax, sharey=ax)
    plot_single_2d_sfs(masked_model, vmin=vmin, vmax=vmax,
                       pop_ids=model_pop_ids, extend=extend )
    ax2.set_title('model')

    if residual == 'Anscombe':
        resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data,
                                              mask=vmin)
    elif residual == 'linear':
        resid = Inference.linear_Poisson_residual(masked_model, masked_data,
                                            mask=vmin)
    else:
        raise ValueError("Unknown class of residual '%s'." % residual)

    if resid_range is None:
        resid_range = max((abs(resid.max()), abs(resid.min())))
    resid_extend = _extend_mapping[-resid_range <= resid.min(), 
                                   resid_range >= resid.max()]

    ax3 = pylab.subplot(2,2,3, sharex=ax, sharey=ax)
    plot_2d_resid(resid, resid_range, pop_ids=resid_pop_ids,
                  extend=resid_extend)
    ax3.set_title('residuals')

    ax = pylab.subplot(2,2,4)
    flatresid = numpy.compress(numpy.logical_not(resid.mask.ravel()), 
                               resid.ravel())
    ax.hist(flatresid, bins=20, normed=True)
    ax.set_title('residuals')
    ax.set_yticks([])
    if saveplot:
        nomplot=nomplot + ".png"    
        pylab.savefig(nomplot)
    if showplot:
        pylab.show()
Exemple #7
0
def plot_2d_meta_resid(s_resid,ns_resid,resid_range=None,
                       fig_num=None, pop_ids=None, 
                       adjust=True, show=True):

    """
    Comparison between 2d nonsynonymous residual and 2d synonymous residual.

    s_resid: residual SFS from synonymous data
    ns_resid: residual SFS from nonsynonymous data
    resid_range: Residual plot saturates at +- resid_range. This range applies to both
                 the residual SFS's supplied as well as the meta-residual plot.
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    pop_ids: If not None, override pop_ids stored in Spectrum.
    adjust: Should method use automatic 'subplots_adjust'? For advanced
            manipulation of plots, it may be useful to make this False.
    show: Display the plot? False can be useful when plotting many in a loop.
    """

    if ns_resid.folded and not s_resid.folded:
        s_resid = s_resid.fold()

    masked_s, masked_ns = Numerics.intersect_masks(s_resid,ns_resid)

    if fig_num is None:
        f = pylab.gcf()
    else:
        f = pylab.figure(fig_num, figsize=(7,7))

    pylab.clf()
    if adjust:
        pylab.subplots_adjust(bottom=0.07, left=0.07, top=0.94, right=0.95, 
                              hspace=0.26, wspace=0.26) 
                                 
    max_toplot = max(masked_s.max(), masked_ns.max())
    min_toplot = min(masked_s.min(), masked_ns.min())

    if pop_ids is not None:
        ns_pop_ids = s_pop_ids = resid_pop_ids = pop_ids
        if len(pop_ids) != 2:
            raise ValueError('pop_ids must be of length 2.')
    else:
        ns_pop_ids = masked_ns.pop_ids
        s_pop_ids = masked_s.pop_ids
        if masked_s.pop_ids is None:
            s_pop_ids = ns_pop_ids

        if s_pop_ids == ns_pop_ids:
           resid_pop_ids = s_pop_ids
        else:
            resid_pop_ids = None

    if resid_range is None:
        resid_range = max((abs(masked_s.max()), abs(masked_s.min())))
    resid_extend = _extend_mapping[-resid_range <= masked_s.min(), 
                                   resid_range >= masked_s.max()]

    ax = pylab.subplot(2,2,1)
    plot_2d_resid(masked_s, resid_range=resid_range, pop_ids=resid_pop_ids,
                  extend=resid_extend)
    ax.set_title('Synonymous Residuals')

    if resid_range is None:
        resid_range = max((abs(masked_ns.max()), abs(masked_ns.min())))
    resid_extend = _extend_mapping[-resid_range <= masked_ns.min(), 
                                   resid_range >= masked_ns.max()]

    ax2 = pylab.subplot(2,2,2, sharex=ax, sharey=ax)
    plot_2d_resid(masked_ns, resid_range=resid_range, pop_ids=resid_pop_ids,
                  extend=resid_extend)
    ax2.set_title('Nonsynonymous Residuals')

    resid = masked_s-masked_ns

    if resid_range is None:
        resid_range = max((abs(resid.max()), abs(resid.min())))
    resid_extend = _extend_mapping[-resid_range <= resid.min(), 
                                   resid_range >= resid.max()]

    ax3 = pylab.subplot(2,2,3, sharex=ax, sharey=ax)
    plot_2d_resid(resid, resid_range, pop_ids=resid_pop_ids,
                  extend=resid_extend,cmap=pylab.cm.PuOr_r)
    ax3.set_title('Meta-residuals')

    ax = pylab.subplot(2,2,4)
    flatresid = numpy.compress(numpy.logical_not(resid.mask.ravel()), 
                               resid.ravel())
    ax.hist(flatresid, bins=20, normed=True,color='purple')

    resid.data[resid.mask==True]=0
    sum_squares=numpy.sum(resid.data**2)
    ax.set_title(r'$res^2$ = '+'{0:.3f}'.format(sum_squares))
    ax.set_yticks([])

    if show:
        pylab.show()
Exemple #8
0
def optimal_sfs_scaling(model,data):
    data = numerics.fold(data)
    model = numerics.fold(data)
    model, data = Numerics.intersect_masks(model, data)
    return data.sum()/model.sum()