def plot_1d_comp_Poisson(model, data, fig_num=None, residual='Anscombe', plot_masked=False, show=True): """ Poisson comparison between 1d model and data. model: 1-dimensional model SFS data: 1-dimensional data SFS fig_num: Clear and use figure fig_num for display. If None, an new figure window is created. residual: 'Anscombe' for Anscombe residuals, which are more normally distributed for Poisson sampling. 'linear' for the linear residuals, which can be less biased. plot_masked: Additionally plots (in open circles) results for points in the model or data that were masked. show: If True, execute pylab.show command to make sure plot displays. """ if fig_num is None: f = pylab.gcf() else: f = pylab.figure(fig_num, figsize=(7, 7)) pylab.clf() if data.folded and not model.folded: model = model.fold() masked_model, masked_data = Numerics.intersect_masks(model, data) ax = pylab.subplot(2, 1, 1) pylab.semilogy(masked_data, '-ob', label='data') pylab.semilogy(masked_model, '-or', label='model') if plot_masked: pylab.semilogy(masked_data.data, '--ob', mfc='w', zorder=-100) pylab.semilogy(masked_model.data, '--or', mfc='w', zorder=-100) ax.legend(loc='upper right') pylab.subplot(2, 1, 2, sharex=ax) if residual == 'Anscombe': resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data) elif residual == 'linear': resid = Inference.linear_Poisson_residual(masked_model, masked_data) else: raise ValueError("Unknown class of residual '%s'." % residual) pylab.plot(resid, '-og') if plot_masked: pylab.plot(resid.data, '--og', mfc='w', zorder=-100) ax.set_xlim(0, data.shape[0] - 1) if show: pylab.show()
def optimal_sfs_scaling(model, data): """ Optimal multiplicative scaling factor between model and data. This scaling is based on only those entries that are masked in neither model nor data. """ if data.folded and not model.folded: model = model.fold() model, data = Numerics.intersect_masks(model, data) return data.sum() / model.sum()
def optimal_sfs_scaling(model, data): """ Optimal multiplicative scaling factor between model and data. This scaling is based on only those entries that are masked in neither model nor data. """ if data.folded and not model.folded: model = model.fold() model, data = Numerics.intersect_masks(model, data) return data.sum()/model.sum()
def plot_1d_comp_Poisson(model, data, fig_num=None, residual='Anscombe', plot_masked=False): """ Poisson comparison between 1d model and data. model: 1-dimensional model SFS data: 1-dimensional data SFS fig_num: Clear and use figure fig_num for display. If None, an new figure window is created. residual: 'Anscombe' for Anscombe residuals, which are more normally distributed for Poisson sampling. 'linear' for the linear residuals, which can be less biased. plot_masked: Additionally plots (in open circles) results for points in the model or data that were masked. """ if fig_num is None: f = pylab.gcf() else: f = pylab.figure(fig_num, figsize=(7,7)) pylab.clf() if data.folded and not model.folded: model = model.fold() masked_model, masked_data = Numerics.intersect_masks(model, data) ax = pylab.subplot(2,1,1) pylab.semilogy(masked_data, '-ob') pylab.semilogy(masked_model, '-or') if plot_masked: pylab.semilogy(masked_data.data, '--ob', mfc='w', zorder=-100) pylab.semilogy(masked_model.data, '--or', mfc='w', zorder=-100) pylab.subplot(2,1,2, sharex = ax) if residual == 'Anscombe': resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data) elif residual == 'linear': resid = Inference.linear_Poisson_residual(masked_model, masked_data) else: raise ValueError("Unknown class of residual '%s'." % residual) pylab.plot(resid, '-og') if plot_masked: pylab.plot(resid.data, '--og', mfc='w', zorder=-100) ax.set_xlim(0, data.shape[0]-1) pylab.show()
def plot_3d_comp_Poisson(model, data, vmin=None, vmax=None, resid_range=None, fig_num=None, pop_ids=None, residual='Anscombe', adjust=True): """ Poisson comparison between 3d model and data. model: 3-dimensional model SFS data: 3-dimensional data SFS vmin, vmax: Minimum and maximum values plotted for sfs are vmin and vmax respectively. resid_range: Residual plot saturates at +- resid_range. fig_num: Clear and use figure fig_num for display. If None, an new figure window is created. pop_ids: If not None, override pop_ids stored in Spectrum. residual: 'Anscombe' for Anscombe residuals, which are more normally distributed for Poisson sampling. 'linear' for the linear residuals, which can be less biased. adjust: Should method use automatic 'subplots_adjust'? For advanced manipulation of plots, it may be useful to make this False. """ if data.folded and not model.folded: model = model.fold() masked_model, masked_data = Numerics.intersect_masks(model, data) if fig_num is None: f = pylab.gcf() else: f = pylab.figure(fig_num, figsize=(8,10)) pylab.clf() if adjust: pylab.subplots_adjust(bottom=0.07, left=0.07, top=0.95, right=0.95) modelmax = max(masked_model.sum(axis=sax).max() for sax in range(3)) datamax = max(masked_data.sum(axis=sax).max() for sax in range(3)) modelmin = min(masked_model.sum(axis=sax).min() for sax in range(3)) datamin = min(masked_data.sum(axis=sax).min() for sax in range(3)) max_toplot = max(modelmax, datamax) min_toplot = min(modelmin, datamin) if vmax is None: vmax = max_toplot if vmin is None: vmin = min_toplot extend = _extend_mapping[vmin <= min_toplot, vmax >= max_toplot] # Calculate the residuals if residual == 'Anscombe': resids = [Inference.\ Anscombe_Poisson_residual(masked_model.sum(axis=2-sax), masked_data.sum(axis=2-sax), mask=vmin) for sax in range(3)] elif residual == 'linear': resids =[Inference.\ linear_Poisson_residual(masked_model.sum(axis=2-sax), masked_data.sum(axis=2-sax), mask=vmin) for sax in range(3)] else: raise ValueError("Unknown class of residual '%s'." % residual) min_resid = min([r.min() for r in resids]) max_resid = max([r.max() for r in resids]) if resid_range is None: resid_range = max((abs(max_resid), abs(min_resid))) resid_extend = _extend_mapping[-resid_range <= min_resid, resid_range >= max_resid] if pop_ids is not None: if len(pop_ids) != 3: raise ValueError('pop_ids must be of length 3.') data_ids = model_ids = resid_ids = pop_ids else: data_ids = masked_data.pop_ids model_ids = masked_model.pop_ids if model_ids is None: model_ids = data_ids if model_ids == data_ids: resid_ids = model_ids else: resid_ids = None for sax in range(3): marg_data = masked_data.sum(axis=2-sax) marg_model = masked_model.sum(axis=2-sax) curr_ids = [] for ids in [data_ids, model_ids, resid_ids]: if ids is None: ids = ['pop0', 'pop1', 'pop2'] if ids is not None: ids = list(ids) del ids[2-sax] curr_ids.append(ids) ax = pylab.subplot(4,3,sax+1) plot_colorbar = (sax == 2) plot_single_2d_sfs(marg_data, vmin=vmin, vmax=vmax, pop_ids=curr_ids[0], extend=extend, colorbar=plot_colorbar) pylab.subplot(4,3,sax+4, sharex=ax, sharey=ax) plot_single_2d_sfs(marg_model, vmin=vmin, vmax=vmax, pop_ids=curr_ids[1], extend=extend, colorbar=False) resid = resids[sax] pylab.subplot(4,3,sax+7, sharex=ax, sharey=ax) plot_2d_resid(resid, resid_range, pop_ids=curr_ids[2], extend=resid_extend, colorbar=plot_colorbar) ax = pylab.subplot(4,3,sax+10) flatresid = numpy.compress(numpy.logical_not(resid.mask.ravel()), resid.ravel()) ax.hist(flatresid, bins=20, normed=True) ax.set_yticks([]) pylab.show()
def plot_2d_comp_Poisson(model, data, vmin=None, vmax=None, resid_range=None, fig_num=None, pop_ids=None, residual='Anscombe', adjust=True, saveplot=False, nomplot="plot_2d_comp_Poisson", showplot=True): """ Poisson comparison between 2d model and data. model: 2-dimensional model SFS data: 2-dimensional data SFS vmin, vmax: Minimum and maximum values plotted for sfs are vmin and vmax respectively. resid_range: Residual plot saturates at +- resid_range. fig_num: Clear and use figure fig_num for display. If None, an new figure window is created. pop_ids: If not None, override pop_ids stored in Spectrum. residual: 'Anscombe' for Anscombe residuals, which are more normally distributed for Poisson sampling. 'linear' for the linear residuals, which can be less biased. adjust: Should method use automatic 'subplots_adjust'? For advanced manipulation of plots, it may be useful to make this False. """ if data.folded and not model.folded: model = model.fold() masked_model, masked_data = Numerics.intersect_masks(model, data) if fig_num is None: f = pylab.gcf() else: f = pylab.figure(fig_num, figsize=(7,7)) pylab.clf() if adjust: pylab.subplots_adjust(bottom=0.07, left=0.07, top=0.94, right=0.95, hspace=0.26, wspace=0.26) max_toplot = max(masked_model.max(), masked_data.max()) min_toplot = min(masked_model.min(), masked_data.min()) if vmax is None: vmax = max_toplot if vmin is None: vmin = min_toplot extend = _extend_mapping[vmin <= min_toplot, vmax >= max_toplot] if pop_ids is not None: data_pop_ids = model_pop_ids = resid_pop_ids = pop_ids if len(pop_ids) != 2: raise ValueError('pop_ids must be of length 2.') else: data_pop_ids = masked_data.pop_ids model_pop_ids = masked_model.pop_ids if masked_model.pop_ids is None: model_pop_ids = data_pop_ids if model_pop_ids == data_pop_ids: resid_pop_ids = model_pop_ids else: resid_pop_ids = None ax = pylab.subplot(2,2,1) plot_single_2d_sfs(masked_data, vmin=vmin, vmax=vmax, pop_ids=data_pop_ids, colorbar=False) ax.set_title('data') ax2 = pylab.subplot(2,2,2, sharex=ax, sharey=ax) plot_single_2d_sfs(masked_model, vmin=vmin, vmax=vmax, pop_ids=model_pop_ids, extend=extend ) ax2.set_title('model') if residual == 'Anscombe': resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data, mask=vmin) elif residual == 'linear': resid = Inference.linear_Poisson_residual(masked_model, masked_data, mask=vmin) else: raise ValueError("Unknown class of residual '%s'." % residual) if resid_range is None: resid_range = max((abs(resid.max()), abs(resid.min()))) resid_extend = _extend_mapping[-resid_range <= resid.min(), resid_range >= resid.max()] ax3 = pylab.subplot(2,2,3, sharex=ax, sharey=ax) plot_2d_resid(resid, resid_range, pop_ids=resid_pop_ids, extend=resid_extend) ax3.set_title('residuals') ax = pylab.subplot(2,2,4) flatresid = numpy.compress(numpy.logical_not(resid.mask.ravel()), resid.ravel()) ax.hist(flatresid, bins=20, normed=True) ax.set_title('residuals') ax.set_yticks([]) if saveplot: nomplot=nomplot + ".png" pylab.savefig(nomplot) if showplot: pylab.show()
def plot_2d_meta_resid(s_resid,ns_resid,resid_range=None, fig_num=None, pop_ids=None, adjust=True, show=True): """ Comparison between 2d nonsynonymous residual and 2d synonymous residual. s_resid: residual SFS from synonymous data ns_resid: residual SFS from nonsynonymous data resid_range: Residual plot saturates at +- resid_range. This range applies to both the residual SFS's supplied as well as the meta-residual plot. fig_num: Clear and use figure fig_num for display. If None, an new figure window is created. pop_ids: If not None, override pop_ids stored in Spectrum. adjust: Should method use automatic 'subplots_adjust'? For advanced manipulation of plots, it may be useful to make this False. show: Display the plot? False can be useful when plotting many in a loop. """ if ns_resid.folded and not s_resid.folded: s_resid = s_resid.fold() masked_s, masked_ns = Numerics.intersect_masks(s_resid,ns_resid) if fig_num is None: f = pylab.gcf() else: f = pylab.figure(fig_num, figsize=(7,7)) pylab.clf() if adjust: pylab.subplots_adjust(bottom=0.07, left=0.07, top=0.94, right=0.95, hspace=0.26, wspace=0.26) max_toplot = max(masked_s.max(), masked_ns.max()) min_toplot = min(masked_s.min(), masked_ns.min()) if pop_ids is not None: ns_pop_ids = s_pop_ids = resid_pop_ids = pop_ids if len(pop_ids) != 2: raise ValueError('pop_ids must be of length 2.') else: ns_pop_ids = masked_ns.pop_ids s_pop_ids = masked_s.pop_ids if masked_s.pop_ids is None: s_pop_ids = ns_pop_ids if s_pop_ids == ns_pop_ids: resid_pop_ids = s_pop_ids else: resid_pop_ids = None if resid_range is None: resid_range = max((abs(masked_s.max()), abs(masked_s.min()))) resid_extend = _extend_mapping[-resid_range <= masked_s.min(), resid_range >= masked_s.max()] ax = pylab.subplot(2,2,1) plot_2d_resid(masked_s, resid_range=resid_range, pop_ids=resid_pop_ids, extend=resid_extend) ax.set_title('Synonymous Residuals') if resid_range is None: resid_range = max((abs(masked_ns.max()), abs(masked_ns.min()))) resid_extend = _extend_mapping[-resid_range <= masked_ns.min(), resid_range >= masked_ns.max()] ax2 = pylab.subplot(2,2,2, sharex=ax, sharey=ax) plot_2d_resid(masked_ns, resid_range=resid_range, pop_ids=resid_pop_ids, extend=resid_extend) ax2.set_title('Nonsynonymous Residuals') resid = masked_s-masked_ns if resid_range is None: resid_range = max((abs(resid.max()), abs(resid.min()))) resid_extend = _extend_mapping[-resid_range <= resid.min(), resid_range >= resid.max()] ax3 = pylab.subplot(2,2,3, sharex=ax, sharey=ax) plot_2d_resid(resid, resid_range, pop_ids=resid_pop_ids, extend=resid_extend,cmap=pylab.cm.PuOr_r) ax3.set_title('Meta-residuals') ax = pylab.subplot(2,2,4) flatresid = numpy.compress(numpy.logical_not(resid.mask.ravel()), resid.ravel()) ax.hist(flatresid, bins=20, normed=True,color='purple') resid.data[resid.mask==True]=0 sum_squares=numpy.sum(resid.data**2) ax.set_title(r'$res^2$ = '+'{0:.3f}'.format(sum_squares)) ax.set_yticks([]) if show: pylab.show()
def optimal_sfs_scaling(model,data): data = numerics.fold(data) model = numerics.fold(data) model, data = Numerics.intersect_masks(model, data) return data.sum()/model.sum()