def main(): parser = OptionParser(description='Fitting to a noisy data generated by a known function') parser.add_option("--npoints", type="int", help="number of data points") parser.add_option("--low", type="float", help="smallest data point") parser.add_option("--high", type="float", help="highest data point") parser.add_option("--sigma", type="float", help="std of noise") (options, args) = parser.parse_args() pl.figure(1,(7,6)) ax = pl.subplot(1,1,1) pl.connect('key_press_event',kevent.press) sigma = options.sigma Ls = np.append(np.linspace(options.low,options.high,options.npoints),46) nLs = np.linspace(min(Ls),max(Ls),100) Mis = HalfLog(Ls,.5,0.5) errs = np.random.normal(0,sigma, len(Mis)) Mis = Mis+errs pl.errorbar(Ls,Mis,errs,ls='',marker='s',color='b') print sigma/Mis coeff, var_matrix = curve_fit(FreeLog,Ls,Mis,(1.0,1.0,1.0)) err = np.sqrt(np.diagonal(var_matrix)) dof = len(Ls) - len(coeff) chisq = sum(((Mis-FreeLog(Ls,coeff[0],coeff[1],coeff[2]))/sigma)**2) cdf = special.chdtrc(dof,chisq) print 'Free: a = %0.2f(%0.2f); b = %0.2f(%0.2f); c = %0.2f(%0.2f); p-value = %0.2f ' %(coeff[0],err[0],coeff[1],err[1],coeff[2],err[2],cdf) pl.plot(nLs,FreeLog(nLs,coeff[0],coeff[1],coeff[2]),label='Free',color='y') coeff, var_matrix = curve_fit(ZeroLog,Ls,Mis,(1.0,1.0)) err = np.sqrt(np.diagonal(var_matrix)) dof = len(Ls) - len(coeff) chisq = sum(((Mis-ZeroLog(Ls,coeff[0],coeff[1]))/sigma)**2) cdf = special.chdtrc(dof,chisq) print 'Zero: a = %0.2f(%0.2f); c = %0.2f(%0.2f); p-value = %0.2f' %(coeff[0],err[0],coeff[1],err[1],cdf) pl.plot(nLs,ZeroLog(nLs,coeff[0],coeff[1]),label='Zero',color='g') pl.tight_layout() coeff, var_matrix = curve_fit(HalfLog,Ls,Mis,(1.0,1.0)) err = np.sqrt(np.diagonal(var_matrix)) dof = len(Ls) - len(coeff) chisq = sum(((Mis-HalfLog(Ls,coeff[0],coeff[1]))/sigma)**2) cdf = special.chdtrc(dof,chisq) print 'Half: a = %0.2f(%0.2f); c = %0.2f(%0.2f); p-value = %0.2f' %(coeff[0],err[0],coeff[1],err[1],cdf) pl.plot(nLs,HalfLog(nLs,coeff[0],coeff[1]),label='Half',color='b') pl.tight_layout() coeff, var_matrix = curve_fit(OneLog,Ls,Mis,(1.0,1.0)) err = np.sqrt(np.diagonal(var_matrix)) dof = len(Ls) - len(coeff) chisq = sum(((Mis-OneLog(Ls,coeff[0],coeff[1]))/sigma)**2) cdf = special.chdtrc(dof,chisq) print 'Unity: a = %0.2f(%0.2f); c = %0.2f(%0.2f); p-value = %0.2f' %(coeff[0],err[0],coeff[1],err[1],cdf) pl.plot(nLs,OneLog(nLs,coeff[0],coeff[1]),label='Unity',color='r') pl.tight_layout() pl.legend() pl.show()
def ejer5(): sample = [6, 7, 3, 4, 7, 3, 7, 2, 6, 3, 7, 8, 2, 1, 3, 5, 8, 7] n = len(sample) simulado, t = ejer5ext1(sample, 1000, 9, n) print("simulado es {}".format(simulado)) # 7 grados de libertad. k = 9 y m = 1 print("chi-cuadrado {}".format(chdtrc(7, t)))
def chisquare(f_obs, f_exp): """ Fast replacement for scipy.stats.chisquare. Version from https://github.com/scipy/scipy/pull/2525 with additional optimizations. """ f_obs = np.asarray(f_obs, dtype=np.float64) k = len(f_obs) # Reuse f_obs for chi-squared statistics chisq = f_obs chisq -= f_exp chisq **= 2 with np.errstate(invalid="ignore"): chisq /= f_exp chisq = chisq.sum(axis=0) pvalue = special.chdtrc(k - 1, chisq) return pvalue
def test_p_value2(self): obs = np.array([[10,0], [0, 10]]) ct = stats.ContingencyTable(observed=obs) xpd = stats.expected_nway(obs) chi2 = ((np.abs(obs - xpd) - 0.5)**2 / xpd).sum() p = special.chdtrc(ct.dof, chi2) assert_equal(ct.dof, 1) assert_equal(ct.p_value(correction=True), p)
def pValor(v, T): """ Calculo de alfa????? v = Grados de libertad. T = Estadistico. Calcula: P(chi(v) >= T) T ... infinito """ return chdtrc(v, T)
def one_way(data, n): dt = cells.dtype term = data.astype('float64') no_term = n - term t_exp = np.mean(term, 0) t_exp = np.array([t_exp,]*data.shape[0]) nt_exp = n - t_exp t_mss = (term - t_exp)**2/t_exp nt_mss = (no_term - nt_exp)**2/nt_exp chi2 = t_mss + nt_mss return special.chdtrc(1, chi2).astype(dt)
def two_way(cells): dt = cells.dtype cells = cells.astype('float64') # Make sure we don't overflow total = np.apply_over_axes(np.sum, cells, [1,2]).ravel() chi_sq = np.zeros(cells.shape, dtype='float64') for i in range(2): for j in range(2): exp = np.sum(cells[:,i,:], 1).ravel() * np.sum(cells[:,:,j], 1).ravel() / total chi_sq[:,i,j] = (cells[:,i,j] - exp)**2 / exp chi_sq = np.apply_over_axes(np.sum, chi_sq, [1,2]).ravel() return special.chdtrc(1, chi_sq).astype(dt)
def _chisquare(f_obs, f_exp): f_obs = np.asarray(f_obs, dtype=np.float64) k = len(f_obs) # Reuse f_obs for chi-squared statistics chisq = f_obs chisq -= f_exp chisq **= 2 chisq /= f_exp chisq = chisq.max(axis=0) return chisq, special.chdtrc(k - 1, chisq)
def two_way(cells): dt = cells.dtype cells = cells.astype('float64') # Make sure we don't overflow total = np.apply_over_axes(np.sum, cells, [1, 2]).ravel() chi_sq = np.zeros(cells.shape, dtype='float64') for i in range(2): for j in range(2): exp = np.sum(cells[:, i, :], 1).ravel() * np.sum( cells[:, :, j], 1).ravel() / total chi_sq[:, i, j] = (cells[:, i, j] - exp)**2 / exp chi_sq = np.apply_over_axes(np.sum, chi_sq, [1, 2]).ravel() return special.chdtrc(1, chi_sq).astype(dt)
def _chisquare(f_obs, f_exp): f_obs = np.asarray(f_obs, dtype=np.float64) k = len(f_obs) # Reuse f_obs for chi-squared statistics chisq = f_obs chisq -= f_exp chisq **= 2 with np.errstate(invalid="ignore"): chisq /= f_exp chisq = chisq.sum(axis=0) return chisq, special.chdtrc(k - 1, chisq)
def one_way(data, n): dt = cells.dtype term = data.astype('float64') no_term = n - term t_exp = np.mean(term, 0) t_exp = np.array([ t_exp, ] * data.shape[0]) nt_exp = n - t_exp t_mss = (term - t_exp)**2 / t_exp nt_mss = (no_term - nt_exp)**2 / nt_exp chi2 = t_mss + nt_mss return special.chdtrc(1, chi2).astype(dt)
def two_way(cells): """ Two-way chi-square test of independence. Takes a 3D array as input: N(voxels) x 2 x 2, where the last two dimensions are the contingency table for each of N voxels. Returns an array of p-values. """ # dt = cells.dtype cells = cells.astype("float64") # Make sure we don't overflow total = np.apply_over_axes(np.sum, cells, [1, 2]).ravel() chi_sq = np.zeros(cells.shape, dtype="float64") for i in range(2): for j in range(2): exp = np.sum(cells[:, i, :], 1).ravel() * np.sum(cells[:, :, j], 1).ravel() / total chi_sq[:, i, j] = (cells[:, i, j] - exp) ** 2 / exp chi_sq = np.apply_over_axes(np.sum, chi_sq, [1, 2]).ravel() return special.chdtrc(1, chi_sq) # .astype(dt)
def calc_chisquare(self): # chisquare. obs = np.reshape(self.act, -1) exp = np.reshape(self.exp, -1) oesd = np.zeros(len(obs)) for i in range(len(obs)): if obs[i] <= 1e-8 >= exp[i]: oesd[i] = 0 elif exp[i] <= 1e-8: oesd[i] = self.dead * obs[i] ** 2 else: oesd[i] = (obs[i] - exp[i]) ** 2 / exp[i] stat = np.sum(oesd) dof = self.get_num_params() self.p_val = chdtrc(dof, stat)
def chi2_contingency(observed, correction=True): observed = np.asarray(observed) + 0.0001 if np.any(observed < 0): raise ValueError("All values in `observed` must be nonnegative.") if observed.size == 0: raise ValueError("No data; `observed` has size 0.") expected = expected_freq(observed) dof = expected.size - sum(expected.shape) + expected.ndim - 1 if dof == 0: chi2 = 0.0 p = 1.0 else: chi2 = ((observed - expected) ** 2 / expected).sum() p = special.chdtrc(dof, chi2) return chi2, p, dof, expected
def _chisquare(f_obs, f_exp): """Fast replacement for scipy.stats.chisquare. Version from https://github.com/scipy/scipy/pull/2525 with additional optimizations. """ f_obs = np.asarray(f_obs, dtype=np.float64) k = len(f_obs) # Reuse f_obs for chi-squared statistics chisq = f_obs chisq -= f_exp chisq **= 2 chisq /= f_exp chisq = chisq.sum(axis=0) return chisq, special.chdtrc(k - 1, chisq)
def _chisquare(f_obs, f_exp): """Fast replacement for scipy.stats.chisquare. Version from https://github.com/scipy/scipy/pull/2525 with additional optimizations. """ f_obs = np.asarray(f_obs, dtype=np.float64) k = len(f_obs) # Reuse f_obs for χ² statistics chisq = f_obs chisq -= f_exp chisq **= 2 chisq /= f_exp chisq = chisq.sum(axis=0) return chisq, special.chdtrc(k - 1, chisq)
def _chisquare(f_exp, f_obs): """Fast replacement for scipy.stats.chisquare. Version from https://github.com/scipy/scipy/pull/2525 with additional optimizations. """ assert np.issubdtype(f_obs.dtype, np.floating) k = len(f_obs) # Reuse f_obs for χ² statistics chisq = f_obs chisq -= f_exp chisq **= 2 chisq /= f_exp chisq = chisq.sum(axis=0) return chisq, special.chdtrc(k - 1, chisq)
def plot_residuums(self, xdata, ydata, sigma=None, *, box=False, **plotopts): y = ydata - self.apply(xdata) plt.errorbar(xdata, y, yerr=sigma, **plotopts) plt.axhline(0, color="gray") if box: text = r"$ \chi^2 $ / ndf = " + number.formatNumber(self.chisq) + " / " \ + number.formatNumber(self.ndf) text += "\n" + "p = " + number.formatNumber( chdtrc(self.ndf, self.chisq)) info_box(text, location=box)
def one_way(data, n): """ One-way chi-square test of independence. Takes a 1D array as input and compares activation at each voxel to proportion expected under a uniform distribution throughout the array. Note that if you're testing activation with this, make sure that only valid voxels (e.g., in-mask gray matter voxels) are included in the array, or results won't make any sense! """ term = data.astype('float64') no_term = n - term t_exp = np.mean(term, 0) t_exp = np.array([t_exp,]*data.shape[0]) nt_exp = n - t_exp t_mss = (term - t_exp) ** 2 / t_exp nt_mss = (no_term - nt_exp) ** 2 / nt_exp chi2 = t_mss + nt_mss return special.chdtrc(1, chi2)
def one_way(data, n): """ One-way chi-square test of independence. Takes a 1D array as input and compares activation at each voxel to proportion expected under a uniform distribution throughout the array. Note that if you're testing activation with this, make sure that only valid voxels (e.g., in-mask gray matter voxels) are included in the array, or results won't make any sense! """ term = data.astype('float64') no_term = n - term t_exp = np.mean(term, 0) t_exp = np.array([t_exp, ]*data.shape[0]) nt_exp = n - t_exp t_mss = (term - t_exp) ** 2 / t_exp nt_mss = (no_term - nt_exp) ** 2 / nt_exp chi2 = t_mss + nt_mss return special.chdtrc(1, chi2)
def two_way(cells): """ Two-way chi-square test of independence. Takes a 3D array as input: N(voxels) x 2 x 2, where the last two dimensions are the contingency table for each of N voxels. Returns an array of p-values. """ # Mute divide-by-zero warning for bad voxels since we account for that later warnings.simplefilter("ignore", RuntimeWarning) cells = cells.astype('float64') # Make sure we don't overflow total = np.apply_over_axes(np.sum, cells, [1,2]).ravel() chi_sq = np.zeros(cells.shape, dtype='float64') for i in range(2): for j in range(2): exp = np.sum(cells[:,i,:], 1).ravel() * np.sum(cells[:,:,j], 1).ravel() / total bad_vox = np.where(exp==0)[0] chi_sq[:,i,j] = (cells[:,i,j] - exp)**2 / exp chi_sq[bad_vox,i,j] = 1.0 # Set p-value for invalid voxels to 1 chi_sq = np.apply_over_axes(np.sum, chi_sq, [1,2]).ravel() return special.chdtrc(1, chi_sq)#.astype(dt)
def chisqprob(chisq, df): """ Probability value (1-tail) for the Chi^2 probability distribution. Broadcasting rules apply. Parameters ---------- chisq : array_like or float > 0 df : array_like or float, probably int >= 1 Returns ------- chisqprob : ndarray The area from `chisq` to infinity under the Chi^2 probability distribution with degrees of freedom `df`. """ return special.chdtrc(df,chisq)
def chisqprob(chisq, df): """ Probability value (1-tail) for the Chi^2 probability distribution. Broadcasting rules apply. Parameters ---------- chisq : array_like or float > 0 df : array_like or float, probably int >= 1 Returns ------- chisqprob : ndarray The area from `chisq` to infinity under the Chi^2 probability distribution with degrees of freedom `df`. """ return special.chdtrc(df, chisq)
def two_way(cells): """ Two-way chi-square test of independence. Takes a 3D array as input: N(voxels) x 2 x 2, where the last two dimensions are the contingency table for each of N voxels. Returns an array of p-values. """ # Mute divide-by-zero warning for bad voxels since we account for that later warnings.simplefilter("ignore", RuntimeWarning) cells = cells.astype('float64') # Make sure we don't overflow total = np.apply_over_axes(np.sum, cells, [1, 2]).ravel() chi_sq = np.zeros(cells.shape, dtype='float64') for i in range(2): for j in range(2): exp = np.sum(cells[:, i, :], 1).ravel() * np.sum( cells[:, :, j], 1).ravel() / total bad_vox = np.where(exp == 0)[0] chi_sq[:, i, j] = (cells[:, i, j] - exp)**2 / exp chi_sq[bad_vox, i, j] = 1.0 # Set p-value for invalid voxels to 1 chi_sq = np.apply_over_axes(np.sum, chi_sq, [1, 2]).ravel() return special.chdtrc(1, chi_sq) #.astype(dt)
def p_value(self, correction=False): """Compute the p-value associated with a chi-square statistic. Parameters ---------- correction : bool If `correction` is True, and the number of degrees of freedom is 1, use Yates' correction for continuity. Returns ------- p : float The p-value associated with the chi-square statistic. """ # Check and see if the chi2 value has already been computed. # If not, call chi_square() if not hasattr(self, "chi2"): self.chi_square(correction) # Compute the p-value from the degrees of freedom and the chi-square value. self.p = special.chdtrc(self.dof, self.chi2) return self.p
def neurosynth_decode( coordinates, annotations, ids, ids2=None, feature_group=None, features=None, frequency_threshold=0.001, prior=0.5, u=0.05, correction="fdr_bh", ): """Perform discrete functional decoding according to Neurosynth's meta-analytic method. This does not employ correlations between unthresholded maps, which are the method of choice for decoding within Neurosynth and Neurovault. Metadata (i.e., feature labels) for studies within the selected sample (`ids`) are compared to the unselected studies remaining in the database (`dataset`). Neurosynth was described in :footcite:t:`yarkoni2011large`. Parameters ---------- coordinates : :class:`pandas.DataFrame` DataFrame containing coordinates. Must include a column named 'id' and must have a separate row for each reported peak coordinate for each study (i.e., there are multiple rows per ID). IDs from ``coordinates`` must match those from ``annotations``. annotations : :class:`pandas.DataFrame` DataFrame containing labels. Must include a column named 'id' and each row must correspond to a study. Other columns may correspond to individual labels. IDs from ``annotations`` must match those from ``coordinates``. ids : :obj:`list` Subset of studies in coordinates/annotations dataframes indicating target for decoding. Examples include studies reporting at least one peak in an ROI, or studies selected from a clustering analysis. ids2 : :obj:`list` or None, optional Second subset of studies, representing "unselected" studies. If None, then all studies in coordinates/annotations dataframes **not** in ``ids`` will be used. features : :obj:`list`, optional List of features in dataset annotations to use for decoding. Default is None, which uses all features available. frequency_threshold : :obj:`float`, optional Threshold to apply to dataset annotations. Values greater than or equal to the threshold as assigned as label+, while values below the threshold are considered label-. Default is 0.001. prior : :obj:`float`, optional Uniform prior probability of each label being active in a study in the absence of evidence (labels or selection) from the study. Default is 0.5 (50%). u : :obj:`float`, optional Alpha level for multiple comparisons correction. Default is 0.05. correction : {None, "bh", "by", "bonferroni"}, optional Multiple comparisons correction method to apply. Default is 'bh' (Benjamini-Hochberg FDR correction). Returns ------- out_df : :class:`pandas.DataFrame` Table with each label and the following values associated with each label: 'pForward', 'zForward', 'probForward', 'pReverse', 'zReverse', and 'probReverse'. See Also -------- :class:`~nimare.decode.discrete.NeurosynthDecoder`: The associated class for this method. :func:`~nimare.decode.continuous.CorrelationDecoder`: The correlation-based decoding method employed in Neurosynth and NeuroVault. References ---------- .. footbibliography:: """ dataset_ids = sorted(list(set(coordinates["id"].values))) if ids2 is None: unselected = sorted(list(set(dataset_ids) - set(ids))) else: unselected = ids2[:] # Binarize with frequency threshold features_df = annotations.set_index("id", drop=True) features_df = features_df[features].ge(frequency_threshold) sel_array = features_df.loc[ids].values unsel_array = features_df.loc[unselected].values n_selected = len(ids) n_unselected = len(unselected) n_selected_term = np.sum(sel_array, axis=0) n_unselected_term = np.sum(unsel_array, axis=0) n_selected_noterm = n_selected - n_selected_term n_unselected_noterm = n_unselected - n_unselected_term n_term = n_selected_term + n_unselected_term n_noterm = n_selected_noterm + n_unselected_noterm p_term = n_term / (n_term + n_noterm) p_selected_g_term = n_selected_term / n_term p_selected_g_noterm = n_selected_noterm / n_noterm # Recompute conditions with empirically derived prior (or inputted one) if prior is None: # if this is used, p_term_g_selected_prior = p_selected (regardless of term) prior = p_term # Significance testing # One-way chi-square test for consistency of term frequency across terms chi2_fi = one_way(n_selected_term, n_term) p_fi = special.chdtrc(1, chi2_fi) sign_fi = np.sign(n_selected_term - np.mean(n_selected_term)).ravel() # pylint: disable=no-member # Two-way chi-square test for specificity of activation cells = np.array([ [n_selected_term, n_selected_noterm], # pylint: disable=no-member [n_unselected_term, n_unselected_noterm], ]).T chi2_ri = two_way(cells) p_ri = special.chdtrc(1, chi2_ri) sign_ri = np.sign(p_selected_g_term - p_selected_g_noterm).ravel() # pylint: disable=no-member # Multiple comparisons correction across terms. Separately done for FI and RI. if correction in ("bh", "by"): p_corr_fi = fdr(p_fi, alpha=u, method=correction) p_corr_ri = fdr(p_ri, alpha=u, method=correction) elif correction == "bonferroni": p_corr_fi = bonferroni(p_fi) p_corr_ri = bonferroni(p_ri) else: p_corr_fi = p_fi p_corr_ri = p_ri # Compute z-values z_corr_fi = p_to_z(p_corr_fi, "two") * sign_fi z_corr_ri = p_to_z(p_corr_ri, "two") * sign_ri # Effect size # est. prob. of brain state described by term finding activation in ROI p_selected_g_term_g_prior = prior * p_selected_g_term + ( 1 - prior) * p_selected_g_noterm # est. prob. of activation in ROI reflecting brain state described by term p_term_g_selected_g_prior = p_selected_g_term * prior / p_selected_g_term_g_prior arr = np.array([ p_corr_fi, z_corr_fi, p_selected_g_term_g_prior, # pylint: disable=no-member p_corr_ri, z_corr_ri, p_term_g_selected_g_prior, ]).T out_df = pd.DataFrame( data=arr, index=features, columns=[ "pForward", "zForward", "probForward", "pReverse", "zReverse", "probReverse" ], ) out_df.index.name = "Term" return out_df
def brainmap_decode( coordinates, annotations, ids, ids2=None, features=None, frequency_threshold=0.001, u=0.05, correction="fdr_bh", ): """Perform image-to-text decoding for discrete inputs according to the BrainMap method. This method was described in :footcite:t:`amft2015definition`. Parameters ---------- coordinates : :class:`pandas.DataFrame` DataFrame containing coordinates. Must include a column named 'id' and must have a separate row for each reported peak coordinate for each study (i.e., there are multiple rows per ID). IDs from ``coordinates`` must match those from ``annotations``. annotations : :class:`pandas.DataFrame` DataFrame containing labels. Must include a column named 'id' and each row must correspond to a study. Other columns may correspond to individual labels. IDs from ``annotations`` must match those from ``coordinates``. ids : :obj:`list` Subset of studies in coordinates/annotations dataframes indicating target for decoding. Examples include studies reporting at least one peak in an ROI, or studies selected from a clustering analysis. ids2 : :obj:`list` or None, optional Second subset of studies, representing "unselected" studies. If None, then all studies in coordinates/annotations dataframes **not** in ``ids`` will be used. features : :obj:`list`, optional List of features in dataset annotations to use for decoding. Default is None, which uses all features available. frequency_threshold : :obj:`float`, optional Threshold to apply to dataset annotations. Values greater than or equal to the threshold as assigned as label+, while values below the threshold are considered label-. Default is 0.001. u : :obj:`float`, optional Alpha level for multiple comparisons correction. Default is 0.05. correction : {None, "bh", "by", "bonferroni"}, optional Multiple comparisons correction method to apply. Default is 'bh' (Benjamini-Hochberg FDR correction). Returns ------- out_df : :class:`pandas.DataFrame` Table with each label and the following values associated with each label: 'pForward', 'zForward', 'likelihoodForward', 'pReverse', 'zReverse', and 'probReverse'. See Also -------- :func:`~nimare.decode.discrete.BrainMapDecoder`: The associated class for this method. References ---------- .. footbibliography:: """ dataset_ids = sorted(list(set(coordinates["id"].values))) if ids2 is None: unselected = sorted(list(set(dataset_ids) - set(ids))) else: unselected = ids2[:] # Binarize with frequency threshold features_df = annotations.set_index("id", drop=True) features_df = features_df[features].ge(frequency_threshold) sel_array = features_df.loc[ids].values unsel_array = features_df.loc[unselected].values n_selected = len(ids) n_unselected = len(unselected) # the number of times any term is used (e.g., if one experiment uses # two terms, that counts twice). Why though? n_exps_across_terms = np.sum(np.sum(features_df)) n_selected_term = np.sum(sel_array, axis=0) n_unselected_term = np.sum(unsel_array, axis=0) n_selected_noterm = n_selected - n_selected_term n_unselected_noterm = n_unselected - n_unselected_term n_term = n_selected_term + n_unselected_term p_term = n_term / n_exps_across_terms n_foci_in_database = coordinates.shape[0] p_selected = n_selected / n_foci_in_database # I hope there's a way to do this without the for loop n_term_foci = np.zeros(len(features)) n_noterm_foci = np.zeros(len(features)) for i, term in enumerate(features): term_ids = features_df.loc[features_df[term] == 1].index.values noterm_ids = features_df.loc[features_df[term] == 0].index.values n_term_foci[i] = coordinates["id"].isin(term_ids).sum() n_noterm_foci[i] = coordinates["id"].isin(noterm_ids).sum() p_selected_g_term = n_selected_term / n_term_foci # probForward l_selected_g_term = p_selected_g_term / p_selected # likelihoodForward p_selected_g_noterm = n_selected_noterm / n_noterm_foci p_term_g_selected = p_selected_g_term * p_term / p_selected # probReverse p_term_g_selected = p_term_g_selected / np.nansum( p_term_g_selected) # Normalize # Significance testing # Forward inference significance is determined with a binomial distribution p_fi = 1 - binom.cdf(k=n_selected_term, n=n_term_foci, p=p_selected) sign_fi = np.sign(n_selected_term - np.mean(n_selected_term)).ravel() # pylint: disable=no-member # Two-way chi-square test for specificity of activation cells = np.array([ [n_selected_term, n_selected_noterm], # pylint: disable=no-member [n_unselected_term, n_unselected_noterm], ]).T chi2_ri = two_way(cells) p_ri = special.chdtrc(1, chi2_ri) sign_ri = np.sign(p_selected_g_term - p_selected_g_noterm).ravel() # pylint: disable=no-member # Ignore rare features p_fi[n_selected_term < 5] = 1.0 p_ri[n_selected_term < 5] = 1.0 # Multiple comparisons correction across features. Separately done for FI and RI. if correction in ("bh", "by"): p_corr_fi = fdr(p_fi, alpha=u, method=correction) p_corr_ri = fdr(p_ri, alpha=u, method=correction) elif correction == "bonferroni": p_corr_fi = bonferroni(p_fi) p_corr_ri = bonferroni(p_ri) else: p_corr_fi = p_fi p_corr_ri = p_ri # Compute z-values z_corr_fi = p_to_z(p_corr_fi, "two") * sign_fi z_corr_ri = p_to_z(p_corr_ri, "two") * sign_ri # Effect size arr = np.array([ p_corr_fi, z_corr_fi, l_selected_g_term, # pylint: disable=no-member p_corr_ri, z_corr_ri, p_term_g_selected, ]).T out_df = pd.DataFrame( data=arr, index=features, columns=[ "pForward", "zForward", "likelihoodForward", "pReverse", "zReverse", "probReverse", ], ) out_df.index.name = "Term" return out_df
def pValorChi(v, T): return chdtrc(v, T)
def chi2_contingency(observed, correction=True): """Chi-square test of independence of variables in a contingency table. This function computes the chi-square statistic and p-value for the hypothesis test of independence of the observed frequencies in the contingency table [1]_ `observed`. The expected frequencies are computed based on the marginal sums under the assumption of independence; see scipy.stats.expected_freq. The number of degrees of freedom is (expressed using numpy functions and attributes):: dof = observed.size - sum(observed.shape) + observed.ndim - 1 Parameters ---------- observed : array_like The contingency table. The table contains the observed frequencies (i.e. number of occurrences) in each category. In the two-dimensional case, the table is often described as an "R x C table". correction : bool, optional If True, *and* the degrees of freedom is 1, apply Yates' correction for continuity. Returns ------- chi2 : float The chi-square test statistic. Without the Yates' correction, this is the sum of the squares of the observed values minus the expected values, divided by the expected values. With Yates' correction, 0.5 is subtracted from the squared differences before dividing by the expected values. p : float The p-value of the test dof : int Degrees of freedom expected : ndarray, same shape as `observed` The expected frequencies, based on the marginal sums of the table. See Also -------- contingency.expected_freq fisher_exact chisquare Notes ----- An often quoted guideline for the validity of this calculation is that the test should be used only if the observed and expected frequency in each cell is at least 5. This is a test for the independence of different categories of a population. The test is only meaningful when the dimension of `observed` is two or more. Applying the test to a one-dimensional table will always result in `expected` equal to `observed` and a chi-square statistic equal to 0. This function does not handle masked arrays, because the calculation does not make sense with missing values. Like stats.chisquare, this function computes a chi-square statistic; the convenience this function provides is to figure out the expected frequencies and degrees of freedom from the given contingency table. If these were already known, and if the Yates' correction was not required, one could use stats.chisquare. That is, if one calls:: chi2, p, dof, ex = chi2_contingency(obs, correction=False) then the following is true:: (chi2, p) == stats.chisquare(obs.ravel(), f_exp=ex.ravel(), ddof=obs.size - 1 - dof) References ---------- .. [1] http://en.wikipedia.org/wiki/Contingency_table Examples -------- A two-way example (2 x 3): >>> obs = np.array([[10, 10, 20], [20, 20, 20]]) >>> chi2_contingency(obs) (2.7777777777777777, 0.24935220877729619, 2, array([[ 12., 12., 16.], [ 18., 18., 24.]])) A four-way example (2 x 2 x 2 x 2): >>> obs = np.array( ... [[[[12, 17], ... [11, 16]], ... [[11, 12], ... [15, 16]]], ... [[[23, 15], ... [30, 22]], ... [[14, 17], ... [15, 16]]]]) >>> chi2_contingency(obs) (8.7584514426741897, 0.64417725029295503, 11, array([[[[ 14.15462386, 14.15462386], [ 16.49423111, 16.49423111]], [[ 11.2461395 , 11.2461395 ], [ 13.10500554, 13.10500554]]], [[[ 19.5591166 , 19.5591166 ], [ 22.79202844, 22.79202844]], [[ 15.54012004, 15.54012004], [ 18.10873492, 18.10873492]]]])) """ observed = np.asarray(observed) if np.any(observed < 0): raise ValueError("All values in `observed` must be nonnegative.") if observed.size == 0: raise ValueError("No data; `observed` has size 0.") expected = expected_freq(observed) if np.any(expected == 0): # Include one of the positions where expected is zero in # the exception message. zeropos = list(np.where(expected == 0)[0]) raise ValueError("The internally computed table of expected " "frequencies has a zero element at %s." % zeropos) # The degrees of freedom dof = expected.size - sum(expected.shape) + expected.ndim - 1 if dof == 0: # Degenerate case; this occurs when `observed` is 1D (or, more # generally, when it has only one nontrivial dimension). In this # case, we also have observed == expected, so chi2 is 0. chi2 = 0.0 p = 1.0 else: if dof == 1 and correction: # Use Yates' correction for continuity. chi2 = ((np.abs(observed - expected) - 0.5)**2 / expected).sum() else: # Regular chi-square--no correction. chi2 = ((observed - expected)**2 / expected).sum() p = special.chdtrc(dof, chi2) return chi2, p, dof, expected
def pvalue(self): return chdtrc(self.ndf, self.chi2)
print("Params: ", fitter.params) print("Iterations: ", fitter.niter) print("Function ev: ", fitter.nfev) print("Uncertainties: ", fitter.xerror) print("dof: ", fitter.dof) print("chi^2, rchi2: ", fitter.chi2_min, fitter.rchi2_min) print("stderr: ", fitter.stderr) print("Status: ", fitter.status) print("\n======== Statistics ========") from scipy.stats import chi2 rv = chi2(fitter.dof) print("Three methods to calculate the right tail cumulative probability:") print("1. with gammainc(dof/2,chi2/2): ", 1-gammainc(0.5*fitter.dof, 0.5*fitter.chi2_min)) print("2. with scipy's chdtrc(dof,chi2):", chdtrc(fitter.dof,fitter.chi2_min)) print("3. with scipy's chi2.cdf(chi2): ", 1-rv.cdf(fitter.chi2_min)) print("") xc = fitter.chi2_min print("Threshold chi-squared at alpha=0.05: ", rv.ppf(1-0.05)) print("Threshold chi-squared at alpha=0.01: ", rv.ppf(1-0.01)) f = lambda x: -rv.pdf(x) x_max = fminbound(f,1,200) print("""For %d degrees of freedom, the maximum probability in the distribution is at chi-squared=%g """%(fitter.dof, x_max)) alpha = 0.05 # Select a p-value chi2max = max(3*x_max, fitter.chi2_min)
def chi2sf(x, df): return special.chdtrc(df, x)
def _fit(self, dataset1, dataset2): self.dataset1 = dataset1 self.dataset2 = dataset2 self.masker = self.masker or dataset1.masker self.null_distributions_ = {} ma_maps1 = self._collect_ma_maps( maps_key="ma_maps1", coords_key="coordinates1", fname_idx=0, ) ma_maps2 = self._collect_ma_maps( maps_key="ma_maps2", coords_key="coordinates2", fname_idx=1, ) # Calculate different count variables n_selected = ma_maps1.shape[0] n_unselected = ma_maps2.shape[0] n_mappables = n_selected + n_unselected n_selected_active_voxels = np.sum(ma_maps1, axis=0) n_unselected_active_voxels = np.sum(ma_maps2, axis=0) # Remove large arrays del ma_maps1, ma_maps2 # Nomenclature for variables below: p = probability, # F = feature present, g = given, U = unselected, A = activation. # So, e.g., pAgF = p(A|F) = probability of activation # in a voxel if we know that the feature is present in a study. pF = n_selected / n_mappables pA = np.array( (n_selected_active_voxels + n_unselected_active_voxels) / n_mappables ).squeeze() # Conditional probabilities pAgF = n_selected_active_voxels / n_selected pAgU = n_unselected_active_voxels / n_unselected pFgA = pAgF * pF / pA # Recompute conditionals with uniform prior pAgF_prior = self.prior * pAgF + (1 - self.prior) * pAgU pFgA_prior = pAgF * self.prior / pAgF_prior # One-way chi-square test for consistency of activation pAgF_chi2_vals = one_way(np.squeeze(n_selected_active_voxels), n_selected) pAgF_p_vals = special.chdtrc(1, pAgF_chi2_vals) pAgF_sign = np.sign(n_selected_active_voxels - np.mean(n_selected_active_voxels)) pAgF_z = p_to_z(pAgF_p_vals, tail="two") * pAgF_sign # Two-way chi-square for specificity of activation cells = np.squeeze( np.array( [ [n_selected_active_voxels, n_unselected_active_voxels], [ n_selected - n_selected_active_voxels, n_unselected - n_unselected_active_voxels, ], ] ).T ) pFgA_chi2_vals = two_way(cells) pFgA_p_vals = special.chdtrc(1, pFgA_chi2_vals) pFgA_p_vals[pFgA_p_vals < 1e-240] = 1e-240 pFgA_sign = np.sign(pAgF - pAgU).ravel() pFgA_z = p_to_z(pFgA_p_vals, tail="two") * pFgA_sign images = { "prob_desc-A": pA, "prob_desc-AgF": pAgF, "prob_desc-FgA": pFgA, ("prob_desc-AgF_given_pF=%0.2f" % self.prior): pAgF_prior, ("prob_desc-FgA_given_pF=%0.2f" % self.prior): pFgA_prior, "z_desc-consistency": pAgF_z, "z_desc-specificity": pFgA_z, "chi2_desc-consistency": pAgF_chi2_vals, "chi2_desc-specificity": pFgA_chi2_vals, "p_desc-consistency": pAgF_p_vals, "p_desc-specificity": pFgA_p_vals, } return images
def main(): parser = OptionParser(description='Fitting to a noisy data generated by a known function') parser.add_option("--npoints", type="int", help="number of data points") parser.add_option("--low", type="float", help="smallest data point") parser.add_option("--high", type="float", help="highest data point") parser.add_option("--sigma", type="float", help="std of noise") (options, args) = parser.parse_args() pl.figure(1,(8,3)) pl.rcParams.update(mplrc.aps['params']) ax = pl.subplot(1,1,1) ax.set_xlabel(r'$L_y$') ax.set_ylabel(r'$I_2^{A}/L_y$') minorLocator = AutoMinorLocator(5) ax.xaxis.set_minor_locator(minorLocator) minorLocator = AutoMinorLocator(5) ax.yaxis.set_minor_locator(minorLocator) choice = 4 #4#0 off = 0 FitFuncs = [FreeLog,FreeLog2,OneLog,HalfLog,HalfLog2,ZeroLog] FitEqs = [r'0.5N_G log(L)/L+',r'b \, log(L)/L+',r'log(L)/L+',r'0.5log(L)/L+',r'0.5log(\frac{\rho_s}{c}L)/L+',''] for i,fit in enumerate(FitEqs): if choice == 1: FitEqs[i] = r'a+'+fit+r'c(b,\gamma_{free})/L' elif choice == 3: FitEqs[i] = r'a+'+fit+r'c(\gamma_{free})/L' elif choice == 4: FitEqs[i] = r'a+'+fit+r'\gamma_{ord}/L' else: FitEqs[i] = r'a+'+fit+r'd/L' FitEqs[1] = FitEqs[1] + r'+d/L^2' FitEqs[4] = FitEqs[4] + r'+d/L^2' FitFunc = FitFuncs[choice] FitEq = FitEqs[choice] if choice == 0: clab = ['a','N_G','d'] guess = np.ones(3) elif choice == 1: clab = ['a','b',r'\gamma_{free}','d'] guess = np.ones(4) elif choice == 3: clab = ['a',r'\gamma_{free}'] guess = np.ones(2) elif choice == 4: clab = ['a',r'\gamma_{ord}','d'] guess = np.ones(3) else: clab = ['a','c'] guess = np.ones(2) ax.set_title(r'$Fit\, to\, the\, form:\, %s$' %FitEq) pl.connect('key_press_event',kevent.press) sigma = options.sigma off = 1 offh = 8 beta = 184 gammae_exp = [] gammav_exp = [] #---------------------------------------------------------------------------------------------------------------------------------------------- #1/8------------------------------------------------------------------------------------------------------------------------------------------- #---------------------------------------------------------------------------------------------------------------------------------------------- off = 0 Ls = np.array([8,16,24,32]) #MIs = np.array([0.20170987004291954, 0.16195953475521396, 0.14235010902049608, 0.13113092651426075]) #dMIs = np.array([0.00019319572927642786, 0.00023402606005673606, 0.00024543616376892271, 0.00019744156940868488]) MIs = np.array([0.20170987004291954, 0.16195953475521396, 0.14235010902049608, 0.13107297667495113]) dMIs = np.array([0.00019319572927642786, 0.00023402606005673606, 0.00024543616376892271, 0.00016942024648351241]) print Ls coeff, var_matrix = curve_fit(FitFunc,Ls,MIs,guess,sigma=1.0/(dMIs**2)) err = np.sqrt(np.diagonal(var_matrix)) dof = len(Ls) - len(coeff) chisq = sum(((MIs-FitFunc(Ls,*coeff))/dMIs)**2) cdf = special.chdtrc(dof,chisq) lab = 'Fit: ' if choice == 4: gammav_exp.append(coeff[-2]) gammae_exp.append(err[-2]) for i in range(len(guess)): lab += r'$%s = %0.3f(%0.3f);\,$' %(clab[i],coeff[i],err[i]) lab += r'$\chi^2/DOF=%0.2f;\,$' %(chisq/float(dof)) lab += r'$\, p-value = %0.2f$' %cdf #pl.errorbar(Ls,MIs,dMIs,ls='',label=r'$\beta =%3.0f$'%beta,color='y') pl.errorbar(Ls,MIs,dMIs,ls='',label=r'$Data\, for\, A/(L_xL_y)=1/8$',color=fcolors(4)) nLs = np.linspace(Ls[0],Ls[-1],100) pl.plot(nLs,FitFunc(nLs,*coeff),label=lab,color=fcolors(4)) #---------------------------------------------------------------------------------------------------------------------------------------------- #2/8------------------------------------------------------------------------------------------------------------------------------------------ #---------------------------------------------------------------------------------------------------------------------------------------------- off = 1 Ls = np.array([4,8, 12, 16, 20, 24, 28,32])[off:] #MIs = np.array([ 0.27140758767953033, 0.22113400442581177, 0.19095315323315157, 0.17249654628602423, 0.15946872869885381, 0.14956906776848633, 0.14271734664170935, 0.13648367829389965])[off:] #dMIs = np.array([ 0.00035063105703284086, 0.00028390662712597328, 0.00030945156199167442, 0.00032588203068154991, 0.00021749486902703514, 0.00034962902498649281, 0.00033862852457444855,0.00027553624634053158])[off:] MIs = np.array([ 0.27140758767953033, 0.22113400442581177, 0.19095315323315157, 0.17249654628602423, 0.15946872869885381, 0.14956906776848633, 0.14271734664170935, 0.13639189179498032])[off:] dMIs = np.array([ 0.00035063105703284086, 0.00028390662712597328, 0.00030945156199167442, 0.00032588203068154991, 0.00021749486902703514, 0.00034962902498649281, 0.00033862852457444855,0.0002417771731823463])[off:] coeff, var_matrix = curve_fit(FitFunc,Ls,MIs,guess,sigma=1.0/(dMIs**2)) err = np.sqrt(np.diagonal(var_matrix)) dof = len(Ls) - len(coeff) chisq = sum(((MIs-FitFunc(Ls,*coeff))/dMIs)**2) cdf = special.chdtrc(dof,chisq) lab = 'Fit: ' if choice == 4: gammav_exp.append(coeff[-2]) gammae_exp.append(err[-2]) for i in range(len(guess)): lab += r'$%s = %0.3f(%0.3f);\,$' %(clab[i],coeff[i],err[i]) lab += r'$\chi^2/DOF=%0.2f;\,$' %(chisq/float(dof)) lab += r'$\, p-value = %0.2f$' %cdf #pl.errorbar(Ls,MIs,dMIs,ls='',label=r'$\beta =%3.0f$'%beta,color='b') pl.errorbar(Ls,MIs,dMIs,ls='',label=r'$Data\, for\, A/(L_xL_y)= 1/4$',color=fcolors(6)) nLs = np.linspace(Ls[0],Ls[-1],100) pl.plot(nLs,FitFunc(nLs,*coeff),label=lab,color=fcolors(6)) #---------------------------------------------------------------------------------------------------------------------------------------------- #3/8------------------------------------------------------------------------------------------------------------------------------------------- #---------------------------------------------------------------------------------------------------------------------------------------------- off = 0 Ls = np.array([8,16,24,32]) #MIs = np.array([0.22996345403546045, 0.17674766273907488, 0.15299083379548556, 0.13889845232701539]) #dMIs = np.array([0.00035023175479636717, 0.00039822181187702241, 0.00037804621953715169, 0.00032578195392620591]) MIs = np.array([0.22996345403546045, 0.17674766273907488, 0.15299083379548556, 0.13883803379406248]) dMIs = np.array([0.00035023175479636717, 0.00039822181187702241, 0.00037804621953715169, 0.00029383814706680241]) coeff, var_matrix = curve_fit(FitFunc,Ls,MIs,guess,sigma=1.0/(dMIs**2)) err = np.sqrt(np.diagonal(var_matrix)) dof = len(Ls) - len(coeff) chisq = sum(((MIs-FitFunc(Ls,*coeff))/dMIs)**2) cdf = special.chdtrc(dof,chisq) lab = 'Fit: ' if choice == 4: gammav_exp.append(coeff[-2]) gammae_exp.append(err[-2]) for i in range(len(guess)): lab += r'$%s = %0.3f(%0.3f);\,$' %(clab[i],coeff[i],err[i]) lab += r'$\chi^2/DOF=%0.2f;\,$' %(chisq/float(dof)) lab += r'$\, p-value = %0.2f$' %cdf #pl.errorbar(Ls,MIs,dMIs,ls='',label=r'$\beta =%3.0f$'%beta,color='y') pl.errorbar(Ls,MIs,dMIs,ls='',label=r'$Data\, for\, A/(L_xL_y)=3/8$',color=fcolors(3)) nLs = np.linspace(Ls[0],Ls[-1],100) pl.plot(nLs,FitFunc(nLs,*coeff),label=lab,color=fcolors(3)) #---------------------------------------------------------------------------------------------------------------------------------------------- # 4/8 --------------------------------------------------------------------------------------------------------------------------------------- #---------------------------------------------------------------------------------------------------------------------------------------------- Ls = np.array([8,10, 12, 16, 20, 24,28,32]) #MIs = np.array([0.23251280150358983, 0.21337256591279627,0.19797531796757287, 0.17775003649902879, 0.16411815945015565, 0.15374365325070799, 0.14583169547075336, 0.13937802905214949]) #dMIs = np.array([0.00040395552486499571, 0.00062070973509060503,0.00043835040610511769, 0.00046104739832369675, 0.00030739428405506897, 0.00043982489905168917, 0.00047582622823638226, 0.00037807728176096899]) MIs = np.array([0.23251280150358983, 0.21337256591279627,0.19797531796757287, 0.17775003649902879, 0.16411815945015565, 0.15374365325070799, 0.14583169547075336, 0.13931301552362246]) dMIs = np.array([0.00040395552486499571, 0.00062070973509060503,0.00043835040610511769, 0.00046104739832369675, 0.00030739428405506897, 0.00043982489905168917, 0.00047582622823638226, 0.00033882833155090007]) coeff, var_matrix = curve_fit(FitFunc,Ls,MIs,guess,sigma=1.0/(dMIs**2)) err = np.sqrt(np.diagonal(var_matrix)) dof = len(Ls) - len(coeff) chisq = sum(((MIs-FitFunc(Ls,*coeff))/dMIs)**2) cdf = special.chdtrc(dof,chisq) lab = 'Fit: ' if choice == 4: gammav_exp.append(coeff[-2]) gammae_exp.append(err[-2]) for i in range(len(guess)): lab += r'$%s = %0.3f(%0.3f);\,$' %(clab[i],coeff[i],err[i]) lab += r'$\chi^2/DOF=%0.2f;\,$' %(chisq/float(dof)) lab += r'$\, p-value = %0.2f$' %cdf #pl.errorbar(Ls,MIs,dMIs,ls='',label=r'$\beta =%3.0f$'%beta,color='y') pl.errorbar(Ls,MIs,dMIs,ls='',label=r'$Data\, for\, A/(L_xL_y)=0.50$',color=fcolors(2)) nLs = np.linspace(Ls[0],Ls[-1],100) pl.plot(nLs,FitFunc(nLs,*coeff),label=lab,color=fcolors(2)) pl.tight_layout() #ax.set_ylim([0.13,0.24]) #ax.set_xlim([7.5,32.5]) lgd = pl.legend() lgd.draw_frame(False) if choice == 4: geom_exp = [0.125,0.250,0.375,0.500] geom_thr = [0.100,0.125,0.1500,0.2000,0.2500,0.3000,0.3500,0.3750,0.4000,0.4500,0.5000] gamma_thr = [0.598,0.672,0.7255,0.8008,0.8512,0.8866,0.9116,0.9209,0.9283,0.9379,0.9410] print np.array(gamma_thr) - 0.5*np.log(2.0*np.pi) pl.figure(2,(8,3)) pl.connect('key_press_event',kevent.press) pl.rcParams.update(mplrc.aps['params']) ax = pl.subplot(1,1,1) ax.set_xlabel(r'$A/(L_xL_y)$') ax.set_ylabel(r'$\gamma_{ord}$') minorLocator = AutoMinorLocator(5) ax.xaxis.set_minor_locator(minorLocator) minorLocator = AutoMinorLocator(5) ax.yaxis.set_minor_locator(minorLocator) ax.plot(geom_thr,gamma_thr,color=colors[1],label=r"$Theory$") ax.errorbar(geom_exp,gammav_exp,gammae_exp, ls='', color=colors[2], label=r"$MC$") lgd = pl.legend() lgd.draw_frame(False) lgd.draggable(state=True) ax.set_xlim([0.1,0.51]) pl.tight_layout() pl.show()
def test_p_value1(self): obs = np.array([[10,0], [0, 10]]) ct = stats.ContingencyTable(observed=obs) chi2 = ct.chi_square() p = special.chdtrc(ct.dof, chi2) assert_equal(ct.p_value(), p)
print "Params: ", fitter.params print "Iterations: ", fitter.niter print "Function ev: ", fitter.nfev print "Uncertainties: ", fitter.xerror print "dof: ", fitter.dof print "chi^2, rchi2: ", fitter.chi2_min, fitter.rchi2_min print "stderr: ", fitter.stderr print "Status: ", fitter.status print "\n======== Statistics ========" from scipy.stats import chi2 rv = chi2(fitter.dof) print "Three methods to calculate the right tail cumulative probability:" print "1. with gammainc(dof/2,chi2/2): ", 1-gammainc(0.5*fitter.dof, 0.5*fitter.chi2_min) print "2. with scipy's chdtrc(dof,chi2):", chdtrc(fitter.dof,fitter.chi2_min) print "3. with scipy's chi2.cdf(chi2): ", 1-rv.cdf(fitter.chi2_min) print "" xc = fitter.chi2_min print "Threshold chi-squared at alpha=0.05: ", rv.ppf(1-0.05) print "Threshold chi-squared at alpha=0.01: ", rv.ppf(1-0.01) f = lambda x: -rv.pdf(x) x_max = fminbound(f,1,200) print """For %d degrees of freedom, the maximum probability in the distribution is at chi-squared=%g """%(fitter.dof, x_max) alpha = 0.05 # Select a p-value chi2max = max(3*x_max, fitter.chi2_min)
def chi2_contingency(observed, correction=True): """Chi-square test of independence of variables in a contingency table. This function computes the chi-square statistic and p-value for the hypothesis test of independence of the observed frequencies in the contingency table [1]_ `observed`. The expected frequencies are computed based on the marginal sums under the assumption of independence; see scipy.stats.expected_freq. The number of degrees of freedom is (expressed using numpy functions and attributes):: dof = observed.size - sum(observed.shape) + observed.ndim - 1 Parameters ---------- observed : array_like The contingency table. The table contains the observed frequencies (i.e. number of occurrences) in each category. In the two-dimensional case, the table is often described as an "R x C table". correction : bool, optional If True, *and* the degrees of freedom is 1, apply Yates' correction for continuity. Returns ------- chi2 : float The chi-square test statistic. Without the Yates' correction, this is the sum of the squares of the observed values minus the expected values, divided by the expected values. With Yates' correction, 0.5 is subtracted from the squared differences before dividing by the expected values. p : float The p-value of the test dof : int Degrees of freedom expected : ndarray, same shape as `observed` The expected frequencies, based on the marginal sums of the table. See Also -------- contingency.expected_freq fisher_exact chisquare Notes ----- An often quoted guideline for the validity of this calculation is that the test should be used only if the observed and expected frequency in each cell is at least 5. This is a test for the independence of different categories of a population. The test is only meaningful when the dimension of `observed` is two or more. Applying the test to a one-dimensional table will always result in `expected` equal to `observed` and a chi-square statistic equal to 0. This function does not handle masked arrays, because the calculation does not make sense with missing values. Like stats.chisquare, this function computes a chi-square statistic; the convenience this function provides is to figure out the expected frequencies and degrees of freedom from the given contingency table. If these were already known, and if the Yates' correction was not required, one could use stats.chisquare. That is, if one calls:: chi2, p, dof, ex = chi2_contingency(obs, correction=False) then the following is true:: (chi2, p) == stats.chisquare(obs.ravel(), f_exp=ex.ravel(), ddof=obs.size - 1 - dof) References ---------- .. [1] http://en.wikipedia.org/wiki/Contingency_table Examples -------- A two-way example (2 x 3): >>> obs = np.array([[10, 10, 20], [20, 20, 20]]) >>> chi2_contingency(obs) (2.7777777777777777, 0.24935220877729619, 2, array([[ 12., 12., 16.], [ 18., 18., 24.]])) A four-way example (2 x 2 x 2 x 2): >>> obs = np.array( ... [[[[12, 17], ... [11, 16]], ... [[11, 12], ... [15, 16]]], ... [[[23, 15], ... [30, 22]], ... [[14, 17], ... [15, 16]]]]) >>> chi2_contingency(obs) (8.7584514426741897, 0.64417725029295503, 11, array([[[[ 14.15462386, 14.15462386], [ 16.49423111, 16.49423111]], [[ 11.2461395 , 11.2461395 ], [ 13.10500554, 13.10500554]]], [[[ 19.5591166 , 19.5591166 ], [ 22.79202844, 22.79202844]], [[ 15.54012004, 15.54012004], [ 18.10873492, 18.10873492]]]])) """ observed = np.asarray(observed) if np.any(observed < 0): raise ValueError("All values in `observed` must be nonnegative.") if observed.size == 0: raise ValueError("No data; `observed` has size 0.") expected = expected_freq(observed) if np.any(expected == 0): # Include one of the positions where expected is zero in # the exception message. zeropos = list(np.where(expected == 0)[0]) raise ValueError("The internally computed table of expected " "frequencies has a zero element at %s." % zeropos) # The degrees of freedom dof = expected.size - sum(expected.shape) + expected.ndim - 1 if dof == 0: # Degenerate case; this occurs when `observed` is 1D (or, more # generally, when it has only one nontrivial dimension). In this # case, we also have observed == expected, so chi2 is 0. chi2 = 0.0 p = 1.0 else: if dof == 1 and correction: # Use Yates' correction for continuity. chi2 = ((np.abs(observed - expected) - 0.5) ** 2 / expected).sum() else: # Regular chi-square--no correction. chi2 = ((observed - expected) ** 2 / expected).sum() p = special.chdtrc(dof, chi2) return chi2, p, dof, expected
def neurosynth_decode(coordinates, annotations, ids, ids2=None, features=None, frequency_threshold=0.001, prior=0.5, u=0.05, correction='fdr_bh'): """ Perform discrete functional decoding according to Neurosynth's meta-analytic method [1]_. This does not employ correlations between unthresholded maps, which are the method of choice for decoding within Neurosynth and Neurovault. Metadata (i.e., feature labels) for studies within the selected sample (`ids`) are compared to the unselected studies remaining in the database (`dataset`). References ---------- .. [1] Yarkoni, Tal, et al. "Large-scale automated synthesis of human functional neuroimaging data." Nature methods 8.8 (2011): 665. https://doi.org/10.1038/nmeth.1635 """ id_cols = ['id', 'study_id', 'contrast_id'] dataset_ids = sorted(list(set(coordinates['id'].values))) if ids2 is None: unselected = sorted(list(set(dataset_ids) - set(ids))) else: unselected = ids2[:] if features is None: features = annotations.columns.values features = [f for f in features if f not in id_cols] # Binarize with frequency threshold features_df = annotations.set_index('id', drop=True) features_df = features_df[features].ge(frequency_threshold) sel_array = features_df.loc[ids].values unsel_array = features_df.loc[unselected].values n_selected = len(ids) n_unselected = len(unselected) n_selected_term = np.sum(sel_array, axis=0) n_unselected_term = np.sum(unsel_array, axis=0) n_selected_noterm = n_selected - n_selected_term n_unselected_noterm = n_unselected - n_unselected_term n_term = n_selected_term + n_unselected_term n_noterm = n_selected_noterm + n_unselected_noterm p_term = n_term / (n_term + n_noterm) p_selected_g_term = n_selected_term / n_term p_selected_g_noterm = n_selected_noterm / n_noterm # Recompute conditions with empirically derived prior (or inputted one) if prior is None: # if this is used, p_term_g_selected_prior = p_selected (regardless of term) prior = p_term # Significance testing # One-way chi-square test for consistency of term frequency across terms chi2_fi = one_way(n_selected_term, n_term) p_fi = special.chdtrc(1, chi2_fi) sign_fi = np.sign(n_selected_term - np.mean(n_selected_term)).ravel() # pylint: disable=no-member # Two-way chi-square test for specificity of activation cells = np.array([ [n_selected_term, n_selected_noterm], # pylint: disable=no-member [n_unselected_term, n_unselected_noterm] ]).T chi2_ri = two_way(cells) p_ri = special.chdtrc(1, chi2_ri) sign_ri = np.sign(p_selected_g_term - p_selected_g_noterm).ravel() # pylint: disable=no-member # Multiple comparisons correction across terms. Separately done for FI and RI. if correction is not None: _, p_corr_fi, _, _ = multipletests(p_fi, alpha=u, method=correction, returnsorted=False) _, p_corr_ri, _, _ = multipletests(p_ri, alpha=u, method=correction, returnsorted=False) else: p_corr_fi = p_fi p_corr_ri = p_ri # Compute z-values z_corr_fi = p_to_z(p_corr_fi, 'two') * sign_fi z_corr_ri = p_to_z(p_corr_ri, 'two') * sign_ri # Effect size # est. prob. of brain state described by term finding activation in ROI p_selected_g_term_g_prior = prior * p_selected_g_term + ( 1 - prior) * p_selected_g_noterm # est. prob. of activation in ROI reflecting brain state described by term p_term_g_selected_g_prior = p_selected_g_term * prior / p_selected_g_term_g_prior arr = np.array([ p_corr_fi, z_corr_fi, p_selected_g_term_g_prior, # pylint: disable=no-member p_corr_ri, z_corr_ri, p_term_g_selected_g_prior ]).T out_df = pd.DataFrame(data=arr, index=features, columns=[ 'pForward', 'zForward', 'probForward', 'pReverse', 'zReverse', 'probReverse' ]) out_df.index.name = 'Term' return out_df
elif U <= 5 / 6: return (5) else: return (6) if __name__ == '__main__': """ Enunciado: Para verificar que cierto dado no estaba trucado, se registraron 1000 lanzamientos, resultando que el número de veces que el dado arrojó el valor i (i = 1, 2, 3, 4, 5, 6) fue, respectivamente, 158, 172, 164, 181, 160, 165. Aproximar el p-valor de la prueba: “el dado es honesto” a) utilizando un aproximación ji-cuadrada b) realizando una simulación """ pi = [1 / 6, 1 / 6, 1 / 6, 1 / 6, 1 / 6, 1 / 6] Ni = [158, 172, 164, 181, 160, 165] n, k = sum(Ni), len(pi) Iter = 1000 T = chi2_stadistical(pi, Ni, n) chi2 = chdtrc(k - 1, T) sim = simulation_distribution(n, Iter, T, k, pi, func) print("Estadístico: {}".format(T)) print("===============================") print("P-valor con Chi-cuadrado: {}".format(chi2)) reject(chi2) print("===============================") print("P-valor con Simulación: {}".format(sim)) reject(sim)
def fit(self, voxel_thresh=0.01, q=0.05, corr='FWE', n_iters=5000, prior=0.5, n_cores=4): self.voxel_thresh = voxel_thresh self.corr = corr self.n_iters = n_iters k_est = self.kernel_estimator(self.coordinates, self.mask) ma_maps1 = k_est.transform(self.ids, masked=True, **self.kernel_arguments) ma_maps2 = k_est.transform(self.ids2, masked=True, **self.kernel_arguments) # Calculate different count variables eps = np.spacing(1) n_selected = len(self.ids) n_unselected = len(self.ids2) n_mappables = n_selected + n_unselected # Transform MA maps to 1d arrays ma_maps_all = np.vstack((ma_maps1, ma_maps2)) n_selected_active_voxels = np.sum(ma_maps1, axis=0) n_unselected_active_voxels = np.sum(ma_maps2, axis=0) # Nomenclature for variables below: p = probability, # F = feature present, g = given, U = unselected, A = activation. # So, e.g., pAgF = p(A|F) = probability of activation # in a voxel if we know that the feature is present in a study. pF = (n_selected * 1.0) / n_mappables pA = np.array(np.sum(ma_maps_all, axis=0) / n_mappables).squeeze() # Conditional probabilities pAgF = n_selected_active_voxels * 1.0 / n_selected pAgU = n_unselected_active_voxels * 1.0 / n_unselected pFgA = pAgF * pF / pA # Recompute conditionals with uniform prior pAgF_prior = prior * pAgF + (1 - prior) * pAgU pFgA_prior = pAgF * prior / pAgF_prior # One-way chi-square test for consistency of activation pAgF_chi2_vals = self._one_way(np.squeeze(n_selected_active_voxels), n_selected) pAgF_p_vals = special.chdtrc(1, pAgF_chi2_vals) pAgF_sign = np.sign(n_selected_active_voxels - np.mean(n_selected_active_voxels)) pAgF_z = p_to_z(pAgF_p_vals, tail='two') * pAgF_sign # Two-way chi-square for specificity of activation cells = np.squeeze( np.array([[n_selected_active_voxels, n_unselected_active_voxels], [n_selected - n_selected_active_voxels, n_unselected - n_unselected_active_voxels]]).T) pFgA_chi2_vals = self._two_way(cells) pFgA_p_vals = special.chdtrc(1, pFgA_chi2_vals) pFgA_p_vals[pFgA_p_vals < 1e-240] = 1e-240 pFgA_sign = np.sign(pAgF - pAgU).ravel() pFgA_z = p_to_z(pFgA_p_vals, tail='two') * pFgA_sign images = { 'pA': pA, 'pAgF': pAgF, 'pFgA': pFgA, ('pAgF_given_pF=%0.2f' % prior): pAgF_prior, ('pFgA_given_pF=%0.2f' % prior): pFgA_prior, 'consistency_z': pAgF_z, 'specificity_z': pFgA_z, 'consistency_chi2': pAgF_chi2_vals, 'specificity_chi2': pFgA_chi2_vals} if corr == 'FWE': pool = mp.Pool(n_cores) iter_dfs = [self.coordinates.copy()] * n_iters null_ijk = np.vstack(np.where(self.mask.get_data())).T rand_idx = np.random.choice(null_ijk.shape[0], size=(self.coordinates.shape[0], n_iters)) rand_ijk = null_ijk[rand_idx, :] iter_ijks = np.split(rand_ijk, rand_ijk.shape[1], axis=1) params = zip(iter_dfs, iter_ijks, range(n_iters)) perm_results = pool.map(self._perm, params) pool.close() pAgF_null_chi2_dist, pFgA_null_chi2_dist = zip(*perm_results) # pAgF_FWE pAgF_null_chi2_dist = np.squeeze(pAgF_null_chi2_dist) np.savetxt('null_dist.txt', pAgF_null_chi2_dist) pAgF_p_FWE = np.empty_like(pAgF_chi2_vals).astype(float) for voxel in range(pFgA_chi2_vals.shape[0]): pAgF_p_FWE[voxel] = null_to_p(pAgF_chi2_vals[voxel], pAgF_null_chi2_dist, tail='upper') # Crop p-values of 0 or 1 to nearest values that won't evaluate to # 0 or 1. Prevents inf z-values. pAgF_p_FWE[pAgF_p_FWE < eps] = eps pAgF_p_FWE[pAgF_p_FWE > (1. - eps)] = 1. - eps pAgF_z_FWE = p_to_z(pAgF_p_FWE, tail='two') * pAgF_sign images['consistency_p_FWE'] = pAgF_p_FWE images['consistency_z_FWE'] = pAgF_z_FWE # pFgA_FWE pFgA_null_chi2_dist = np.squeeze(pFgA_null_chi2_dist) pFgA_p_FWE = np.empty_like(pFgA_chi2_vals).astype(float) for voxel in range(pFgA_chi2_vals.shape[0]): pFgA_p_FWE[voxel] = null_to_p(pFgA_chi2_vals[voxel], pFgA_null_chi2_dist, tail='upper') # Crop p-values of 0 or 1 to nearest values that won't evaluate to # 0 or 1. Prevents inf z-values. pFgA_p_FWE[pFgA_p_FWE < eps] = eps pFgA_p_FWE[pFgA_p_FWE > (1. - eps)] = 1. - eps pFgA_z_FWE = p_to_z(pFgA_p_FWE, tail='two') * pFgA_sign images['specificity_p_FWE'] = pFgA_p_FWE images['specificity_z_FWE'] = pFgA_z_FWE elif corr == 'FDR': _, pAgF_p_FDR, _, _ = multipletests(pAgF_p_vals, alpha=0.05, method='fdr_bh', is_sorted=False, returnsorted=False) pAgF_z_FDR = p_to_z(pAgF_p_FDR, tail='two') * pAgF_sign images['consistency_z_FDR'] = pAgF_z_FDR _, pFgA_p_FDR, _, _ = multipletests(pFgA_p_vals, alpha=0.05, method='fdr_bh', is_sorted=False, returnsorted=False) pFgA_z_FDR = p_to_z(pFgA_p_FDR, tail='two') * pFgA_sign images['specificity_z_FDR'] = pFgA_z_FDR self.results = MetaResult(mask=self.mask, **images)
teoría, se estudió una muestra de 564 guisantes, donde se encontró que 141 produjeron flores blancas, 291 flores rosas y 132 flores rojas. Aproximar el p−valor de esta muestra: a) utilizando un aproximación ji-cuadrada, b) realizando una simulación. """ # a) probs = [1/4, 1/2, 1/4] frecuency = [141, 291, 132] n = sum(frecuency) T = stadistic(probs, frecuency, n) k = 3 print("a)") print("==========") print("Estadisitico: {}".format(T)) # Tomar chi_cuadrado con k-1 grados de libertad p_value = chdtrc(k-1, T) print("P-Valor ji-cuadrada: {}".format(p_value)) rejection(p_value) # b) print("b)") print("==========") ITER = 1000 p_value_simulation = simulation(n, ITER, T, k, probs, F) print("P-Valor simulacion: {}".format(p_value_simulation)) rejection(p_value_simulation)
def brainmap_decode(coordinates, annotations, ids, ids2=None, features=None, frequency_threshold=0.001, u=0.05, correction='fdr_bh'): """ Perform image-to-text decoding for discrete image inputs (e.g., regions of interest, significant clusters) according to the BrainMap method [1]_. References ---------- .. [1] Amft, Maren, et al. "Definition and characterization of an extended social-affective default network." Brain Structure and Function 220.2 (2015): 1031-1049. https://doi.org/10.1007/s00429-013-0698-0 """ id_cols = ['id', 'study_id', 'contrast_id'] dataset_ids = sorted(list(set(coordinates['id'].values))) if ids2 is None: unselected = sorted(list(set(dataset_ids) - set(ids))) else: unselected = ids2[:] if features is None: features = annotations.columns.values features = [f for f in features if f not in id_cols] # Binarize with frequency threshold features_df = annotations.set_index('id', drop=True) features_df = features_df[features].ge(frequency_threshold) sel_array = features_df.loc[ids].values unsel_array = features_df.loc[unselected].values n_selected = len(ids) n_unselected = len(unselected) # the number of times any term is used (e.g., if one experiment uses # two terms, that counts twice). Why though? n_exps_across_terms = np.sum(np.sum(features_df)) n_selected_term = np.sum(sel_array, axis=0) n_unselected_term = np.sum(unsel_array, axis=0) n_selected_noterm = n_selected - n_selected_term n_unselected_noterm = n_unselected - n_unselected_term n_term = n_selected_term + n_unselected_term p_term = n_term / n_exps_across_terms n_foci_in_database = coordinates.shape[0] p_selected = n_selected / n_foci_in_database # I hope there's a way to do this without the for loop n_term_foci = np.zeros(len(features)) n_noterm_foci = np.zeros(len(features)) for i, term in enumerate(features): term_ids = features_df.loc[features_df[term] == 1].index.values noterm_ids = features_df.loc[features_df[term] == 0].index.values n_term_foci[i] = coordinates['id'].isin(term_ids).sum() n_noterm_foci[i] = coordinates['id'].isin(noterm_ids).sum() p_selected_g_term = n_selected_term / n_term_foci # probForward l_selected_g_term = p_selected_g_term / p_selected # likelihoodForward p_selected_g_noterm = n_selected_noterm / n_noterm_foci p_term_g_selected = p_selected_g_term * p_term / p_selected # probReverse p_term_g_selected = p_term_g_selected / np.sum( p_term_g_selected) # Normalize # Significance testing # Forward inference significance is determined with a binomial distribution p_fi = 1 - binom.cdf(k=n_selected_term, n=n_term_foci, p=p_selected) sign_fi = np.sign(n_selected_term - np.mean(n_selected_term)).ravel() # pylint: disable=no-member # Two-way chi-square test for specificity of activation cells = np.array([ [n_selected_term, n_selected_noterm], # pylint: disable=no-member [n_unselected_term, n_unselected_noterm] ]).T chi2_ri = two_way(cells) p_ri = special.chdtrc(1, chi2_ri) sign_ri = np.sign(p_selected_g_term - p_selected_g_noterm).ravel() # pylint: disable=no-member # Ignore rare features p_fi[n_selected_term < 5] = 1. p_ri[n_selected_term < 5] = 1. # Multiple comparisons correction across features. Separately done for FI and RI. if correction is not None: _, p_corr_fi, _, _ = multipletests(p_fi, alpha=u, method=correction, returnsorted=False) _, p_corr_ri, _, _ = multipletests(p_ri, alpha=u, method=correction, returnsorted=False) else: p_corr_fi = p_fi p_corr_ri = p_ri # Compute z-values z_corr_fi = p_to_z(p_corr_fi, 'two') * sign_fi z_corr_ri = p_to_z(p_corr_ri, 'two') * sign_ri # Effect size arr = np.array([ p_corr_fi, z_corr_fi, l_selected_g_term, # pylint: disable=no-member p_corr_ri, z_corr_ri, p_term_g_selected ]).T out_df = pd.DataFrame(data=arr, index=features, columns=[ 'pForward', 'zForward', 'likelihoodForward', 'pReverse', 'zReverse', 'probReverse' ]) out_df.index.name = 'Term' return out_df
((y_sigma * delta)**2) / ((y_sigma * delta)**2 + (x_sigma * epsilon)**2))) dy_star = (y_sigma * numpy.sqrt( ((x_sigma * epsilon)**2) / ((y_sigma * delta)**2 + (x_sigma * epsilon)**2))) sigma_odr = numpy.sqrt(dx_star**2 + dy_star**2) # residual is positive if the point lies above the fitted curve, # negative if below residual = (numpy.sign(y_data - func(x_data, *p)) * numpy.sqrt(delta**2 + epsilon**2)) # Print quasi-chi-squared and associated quasi-CDF # WARNING: This CDF is not valid for large x uncertainties! from scipy.special import chdtrc # Chi-squared survival function print(("\n Quasi Chi-Squared/dof = {0:10.5f}," + " Quasi CDF = {1:10.5f}%").format( quasi_chisq, 100. * float(chdtrc(dof, dof * quasi_chisq)))) ################################################################# #### Monte Carlo estimation of uncertainties ################################################################# print("\n**** Running Monte Carlo CDF Estimator ****") # Initialize Monte Carlo output distributions p_dist_Data, p_dist_Model = [], [] quasi_chisq_dist = [] # Initialize timing measurement import time start_time = time.clock() for i in range(Number_of_MC_iterations): # Starting with the x and x uncertainty (x_sigma) values from # the data, calculate Monte Carlo values assuming an uncertainty # distibution defined above by smear e.g. gaussian, uniform, …
def nonLinFit(func, x_data, y_data, y_sigma=None, p_guess=None, plotting=False, verbose=True): """ non linear fit function :Parameters: func: callable The model function, func(x, ...). It must take the independent variable as the first argument and the parameters to fit as separate remaining arguments. x_data: array An N-length sequence or an (k,N)-shaped array for functions with k predictors. The independent variable where the data is measured. y_data: array N-length sequence, the dependent data: nominally func(xdata, ...) p_guess: array, optional (default=None) None, scalar, or M-length sequence Initial guess for the parameters. If None, then the initial values will all be 1 (if the number of parameters for the function can be determined using introspection, otherwise a ValueError is raised). y_sigma: array, optional (default = None) None or N-length sequence If not None, it represents the standard-deviation of y_data. This vector, if given, will be used as weights in the least-squares problem. If None, they are set to unity. verbose: boolean, optional (default = True) in verbose mode it prints out * the optimal values for the parameters so that the sum of the squared error of ``func(xdata, *popt) - ydata`` is minimized and their uncertainties * the estimated covariance and correlation matrices the diagonals provide the variance of the parameter estimate. * Chi-Squared * DOF: degree of freedom * CDF: cumulative distribution function plotting: boolean, optional (default = False) plotting results or not, it overplots: * the guess function, * the measured values (inclusive error bars) * the final fit * the residual with error bars :Returns: fit: array the computed fit function popt: array the optimal values for the parameters so that the sum of the squared error of ``func(xdata, *popt) - ydata`` is minimized and their uncertainties :Raises: TypeError if the fit is bad """ x_func = np.linspace(min(x_data), max(x_data)) initial_plot = func(x_func, *p_guess) try: # Notes: maxfev is the maximum number of func evaluations tried; you # can try increasing this value if the fit fails. p, cov = optimize.curve_fit(func, x_data, y_data, p0=p_guess, sigma=y_sigma, maxfev=100 * (len(x_data) + 1)) except: print("scipy.optimize.curve_fit failed") p, cov = p_guess, None y_fit = func(x_data, *p) y_residual = y_data - y_fit perr = np.zeros(len(p)) # Calculate degrees of freedom of fit dof = len(x_data) - len(p) ## Output results if verbose: print("\nNumber of Data Points = %7g, Number of Parameters = %1g"\ %(len(x_data), len(p) )) print("Covariance Matrix : \n", cov, "\n") if y_sigma is None: y_sigma = 1 try: if verbose: print("Correlation Matrix :") for i, row in enumerate(cov): for j in range(len(p)): if verbose: print("%10f" % (cov[i, j] / np.sqrt(cov[i, i] * cov[j, j])), end=' ') if verbose: print() # Calculate Chi-squared chisq = sum(((y_data - func(x_data, *p)) / y_sigma)**2) #calculate parameter errors for i in range(len(p)): perr[i] = cov[i, i]**0.5 * max(1, np.sqrt(chisq / dof)) if verbose: print( "\nEstimated parameters and uncertainties (with initial guesses)" ) for i in range(len(p)): print((" p[%d] = %10.5f +/- %10.5f (%10.5f)"\ %(i,p[i],cov[i,i]**0.5*max(1,np.sqrt(chisq/dof)), p_guess[i]))) print("Chi-Squared/dof = %10.5f, CDF = %10.5f%%"\ %(chisq/dof, 100.*float(special.chdtrc(dof,chisq)))) if chisq > dof: print( "\nNOTE:Because Chi-squared > dof, the parameter uncertainty" ) print( " estimates have been scaled up by sqrt(Chi-squared/dof)." ) # If cov has not been calculated because of a bad fit, the above block # will cause a python TypeError which is caught by this try-except structure. except TypeError: print("**** BAD FIT ****") print("Parameters were: ", p) # Calculate Chi-squared for current parameters chisq = sum(((y_data - func(x_data, *p)) / y_sigma)**2) print("Chi-Squared/dof for these parameter values = %10.5f, CDF = %10.5f%%"\ %(chisq/dof, 100.*float(special.chdtrc(dof,chisq)))) print("Uncertainties not calculated.") print() print("Try a different initial guess for the fit parameters.") print("Or if these parameters appear close to a good fit, try giving") print( " the fitting program more time by increasing the value of maxfev." ) chisq = None if plotting: ## Plot # create figure with light gray background fig = pyplot.figure(facecolor="0.98") # 3 rows, 1 column, subplot 1 # 3 rows are declared, but there are only 2 plots; this leaves room for text # in the empty 3rd row fit = fig.add_subplot(311) # remove tick labels from upper plot (for clean look) fit.set_xticklabels(()) # Plot data as red circles, and fitted function as (default) line # (The sort is in case the x data are not in sequential order.) fit.plot(x_data, y_data, 'ro', np.sort(x_data), func(np.sort(x_data), *p)) # draw starting guess as dashed green line ('r-') fit.plot(x_func, initial_plot, 'g-', label="Start", linestyle="--") # Add error bars on data as red crosses. fit.errorbar(x_data, y_data, yerr=y_sigma, fmt='r+') # separate plot to show residuals residuals = fig.add_subplot(312) # 3 rows, 1 column, subplot 2 residuals.errorbar(x_data, y_residual, yerr=y_sigma, fmt='r+', label="Residuals") # make sure residual plot has same x axis as fit plot residuals.set_xlim(fit.get_xlim()) residuals.axhline(y=0) # draw horizontal line at 0 on vertical axis # These data look better if 'plain', not scientific, notation is used, # and if the tick labels are not offset by a constant (as is done by default). # Note: This only works for matplotlib version 1.0 and newer, so it is # enclosed in a "try" to avoid errors. try: pyplot.ticklabel_format(style='plain', useOffset=False, axis='x') except: pass # print selected information in empty 3rd plot row try: pyplot.figtext( 0.05, 0.25, "Converged with ChiSq = " + str(chisq) + ", DOF = " + str(dof) + ", CDF = " + str(100 * special.chdtrc(dof, chisq)) + "%") for i, value in enumerate(p): pyplot.figtext( 0.08, 0.16 - i * 0.03, "p[" + str(i) + "]" + " = " + str(p[i]).ljust(18) + " +/- " + str(np.sqrt(cov[i, i]) * max(1, np.sqrt(chisq / dof))), fontdict=None) # Note: Including family="Monospace" in the above figtext call will # produce nicer looking output, but can cause problems with # some older python installations. except TypeError: pyplot.figtext(0.05, 0.25, "BAD FIT. Guess again.") # Display the plot pyplot.show() # To print the plot, save it first, and then print the saved image. # Closing the plot window will end the python script. return y_fit, p, perr
def _fit(self, dataset, dataset2): self.dataset = dataset self.dataset2 = dataset2 self.mask = dataset.masker.mask_img ma_maps1 = self.kernel_estimator.transform(self.dataset, mask=self.mask, masked=True) ma_maps2 = self.kernel_estimator.transform(self.dataset2, mask=self.mask, masked=True) # Calculate different count variables n_selected = ma_maps1.shape[0] n_unselected = ma_maps2.shape[0] n_mappables = n_selected + n_unselected # Transform MA maps to 1d arrays ma_maps_all = np.vstack((ma_maps1, ma_maps2)) n_selected_active_voxels = np.sum(ma_maps1, axis=0) n_unselected_active_voxels = np.sum(ma_maps2, axis=0) # Nomenclature for variables below: p = probability, # F = feature present, g = given, U = unselected, A = activation. # So, e.g., pAgF = p(A|F) = probability of activation # in a voxel if we know that the feature is present in a study. pF = (n_selected * 1.0) / n_mappables pA = np.array(np.sum(ma_maps_all, axis=0) / n_mappables).squeeze() # Conditional probabilities pAgF = n_selected_active_voxels * 1.0 / n_selected pAgU = n_unselected_active_voxels * 1.0 / n_unselected pFgA = pAgF * pF / pA # Recompute conditionals with uniform prior pAgF_prior = self.prior * pAgF + (1 - self.prior) * pAgU pFgA_prior = pAgF * self.prior / pAgF_prior # One-way chi-square test for consistency of activation pAgF_chi2_vals = one_way(np.squeeze(n_selected_active_voxels), n_selected) pAgF_p_vals = special.chdtrc(1, pAgF_chi2_vals) pAgF_sign = np.sign(n_selected_active_voxels - np.mean(n_selected_active_voxels)) pAgF_z = p_to_z(pAgF_p_vals, tail='two') * pAgF_sign # Two-way chi-square for specificity of activation cells = np.squeeze( np.array([[n_selected_active_voxels, n_unselected_active_voxels], [ n_selected - n_selected_active_voxels, n_unselected - n_unselected_active_voxels ]]).T) pFgA_chi2_vals = two_way(cells) pFgA_p_vals = special.chdtrc(1, pFgA_chi2_vals) pFgA_p_vals[pFgA_p_vals < 1e-240] = 1e-240 pFgA_sign = np.sign(pAgF - pAgU).ravel() pFgA_z = p_to_z(pFgA_p_vals, tail='two') * pFgA_sign images = { 'pA': pA, 'pAgF': pAgF, 'pFgA': pFgA, ('pAgF_given_pF=%0.2f' % self.prior): pAgF_prior, ('pFgA_given_pF=%0.2f' % self.prior): pFgA_prior, 'consistency_z': pAgF_z, 'specificity_z': pFgA_z, 'consistency_chi2': pAgF_chi2_vals, 'specificity_chi2': pFgA_chi2_vals, 'consistency_p': pAgF_p_vals, 'specificity_p': pFgA_p_vals, } return images
def main(): # define the mapping between short names and label names parMap = {'x': r'L_x', 'y': r'L_y', 'b': r'\beta', 'T': r'T', 'r': r'r'} parser = argparse.ArgumentParser(description='Plot Raw MC Equilibration Data for Scalar Estimators.') parser.add_argument('fileNames', help='Scalar estimator files', nargs='+') parser.add_argument('--cutoff', action='store_true', default=False, help='Plot Delta as function of the cut-off') args = parser.parse_args() #rcParams.update(mplrc.aps['params']) colors = ["#66CAAE", "#CF6BDD", "#E27844", "#7ACF57", "#92A1D6", "#E17597", "#C1B546",'b'] if args.cutoff: figure(2,(7,6)) ax2 = subplot(211) connect('key_press_event',kevent.press) #ax2.set_xlabel(r'$\beta_{\mathrm{cutoff}}$') xticks([],[]) ax2.set_ylabel(r'$\Delta$') ax4 = subplot(212) ax4.set_xlabel(r'$\beta_{\mathrm{cutoff}}$') ax4.set_ylabel(r'$\Delta$') subplots_adjust(hspace=0) figure(1,(7,6)) connect('key_press_event',kevent.press) ax1 = subplot2grid((2,2), (0,0),colspan=2) ax1.set_xlabel(r'$\beta$') ax1.set_ylabel(r'$S_2^l$') global L global l global j_z i = 0 geom = 'ful' LDelta = [] LDeltaE = [] LJz = [] doubleJz = 1.0 for fileName in args.fileNames: #print fileName scalarhelp = ssexyhelp.ScalarReduce(fileName) parmap = scalarhelp.getParmap() dA = int(parmap['dA']) Lx = int(parmap['Lx']) if 'delta' in parmap.keys(): j_z = float(parmap['delta'])*doubleJz delta_fit = fDeltaJz(j_z) else: delta_fit = .5 LJz += [j_z] L = Lx l = dA scalarhelp.loadData() rb, Beta = scalarhelp.getrParams() if 'ALRatio' in scalarhelp.headers: Zr, dZr = scalarhelp.getAverages('ALRatio') Zr = unumpy.uarray(Zr,dZr) elif 'nAred' in scalarhelp.headers: Ar, dAr = scalarhelp.getAverages('nAred') Ar = unumpy.uarray(Ar,dAr) Ae, dAe = scalarhelp.getAverages('nAext') Ae = unumpy.uarray(Ae,dAe) Zr = Ae/Ar S2A = -unumpy.log(Zr) S2n = unumpy.nominal_values(S2A) S2d = unumpy.std_devs(S2A) #ax1.plot([Beta[0],Beta[-1]],[RenyiZero(),RenyiZero()],color='black',label = r'$S_2(T=0)$') mins = S2n - S2d maxs = S2n + S2d fill_between(Beta, mins,maxs, facecolor = colors[i%len(colors)], alpha =0.25, edgecolor='None') ax1.plot(Beta[0],S2n[0], color = colors[i%len(colors)],linewidth=4)#,label = r"$Data \, L=%2d$" %Lx) (A,B,C) = (0.4,1,0.4) #flshift = np.max(np.where(Beta<1.14)) minB = CutOff(j_z) if (Beta[0] > Lx) and (j_z>0): flshift = 0# np.max(np.where(Beta<Lx)) else: if Beta[0] < minB: flshift = np.max(np.where(Beta<minB)) else: flshift = 0 fhshift = len(Beta)# np.min(np.where(Beta>245)) ZBeta = np.linspace(Beta[flshift],Beta[fhshift-1],1000) #coeff, var_matrix = curve_fit(RenyiCorrection_C,np.array(Beta)[flshift:fhshift],S2n[flshift:fhshift],p0=(C)) #(C) = coeff #errs = np.sqrt(var_matrix.diagonal()) #S2pred = RenyiCorrection_C(ZBeta,C) #ax1.plot(ZBeta, S2pred, color=colors[i%len(colors)], linewidth = 1.5, label = r"$\mathrm{L=%2d \, J_z=%0.2f }$" %(Lx,j_z)) #coeff, var_matrix = curve_fit(RenyiCorrection_DeltaC,np.array(Beta)[flshift:fhshift],S2n[flshift:fhshift],p0=(delta_fit,C)) #(Delta,C) = coeff #errs = np.sqrt(var_matrix.diagonal()) #print 'Length: %3d J_z: %4.3f Theory: %4.3f Fit: %4.3f +/- %4.3f' %(L, j_z, delta_fit,Delta,errs[0]) #LDelta += [Delta] #LDeltaE += [errs[0]] #S2pred = RenyiCorrection_DeltaC(ZBeta,Delta,C) #ax1.plot(ZBeta, S2pred, color=colors[i%len(colors)], linewidth = 1.5, label = r"$\mathrm{L=%2d \, J_z=%0.2f }$" %(Lx,j_z)) if args.cutoff: #minB = 30 flshift2 = flshift if Beta[0] < minB: flshift = np.max(np.where(Beta<minB)) else: flshift = 0 leg = False ax2.plot([Beta[0],Beta[flshift]],[fDeltaJz(j_z),fDeltaJz(j_z)], ls = '-',color=colors[i%len(colors)], linewidth = 1.5) Bs = [] mins = [] maxs = [] chisqs = [] cdfs = [] Bs2 = [] for flshift in range(flshift): ZBeta = np.linspace(Beta[flshift],Beta[fhshift-1],1000) coeff, var_matrix = curve_fit(RenyiCorrection_DeltaC,np.array(Beta)[flshift:fhshift],S2n[flshift:fhshift],p0=(delta_fit,C)) (Delta,C) = coeff if type(var_matrix) is float: break errs = np.sqrt(var_matrix.diagonal()) if flshift!=flshift2: if not(leg): ax2.errorbar(Beta[flshift],Delta,errs[0], color=colors[i%len(colors)], label = r"$\mathrm{L=%2d \, J_z=%0.2f }$" %(Lx,j_z)) leg = True else: mins += [Delta - errs[0]] maxs += [Delta + errs[0]] Bs += [Beta[flshift]] ax2.errorbar(Beta[flshift],Delta,errs[0], color=colors[i%len(colors)]) else: ax2.errorbar(Beta[flshift],Delta,errs[0], color='black') Bs2 += [Beta[flshift]] dof = len(np.array(Beta)[flshift:fhshift]) - len(coeff) chisqs += [sum(((S2n[flshift:fhshift]- RenyiCorrection_DeltaC(np.array(Beta)[flshift:fhshift],Delta,C))/np.array(S2d)[flshift:fhshift])**2)] cdfs += [special.chdtrc(dof,chisqs[-1])] ax2.fill_between(Bs, mins,maxs, facecolor = colors[i%len(colors)], alpha =0.25, edgecolor='None') #ax4.plot(Bs2,chisqs, color=colors[i%len(colors)]) ax4.plot(Bs2,cdfs, color=colors[i%len(colors)]) i += 1 ax1.set_title(r'$A_{\Delta} e^{-\beta*B_{\Delta}}-C$') #lg = ax1.legend(loc='best',ncol = 2,frameon=False) #lg.draggable(state=True) if args.cutoff: lg = ax2.legend(loc='best',ncol = 2,frameon=False) lg.draggable(state=True) ax2.set_ylim([0,1]) ax4.set_ylim([0,1]) #ax3.set_xlim([-1.05,1.05]) ax3 = subplot2grid((2,2), (1,0),colspan=2) ax3.set_xlabel(r'$J_z$') ax3.set_ylabel(r'$\Delta$') ax3.set_ylim([0,0.5]) ax3.set_xlim([-1.05,1.05]) #LJz = np.arccos(np.array(LJz)) #print LJz if len(LJz)==len(LDelta): ax3.errorbar(np.array(LJz), LDelta, LDeltaE, ls='',marker='.',color=colors[2],mec=colors[2],mfc='white', label=r"$\mathrm{QMC}$") Jzs = np.linspace(-1.0,1.0,5000) deltas = fDeltaJz(Jzs) #Jzs = np.arccos(Jzs) ax3.plot(Jzs*1.0, deltas, color=colors[0], linewidth = 1.5, label = r"$\mathrm{Theory}$") lg = ax3.legend(loc='best',ncol = 1,frameon=False) lg.draggable(state=True) tight_layout() show()