def __init__(self, dataset, ids, ids2=None, q=0.01, prior=0.5, min_studies=1): """ Initialize a new MetaAnalysis instance and run an analysis. Args: dataset: A Dataset instance. ids: A list of Mappable IDs to include in the meta-analysis. ids2: Optional second list of Mappable IDs. If passed, the set of studies will be restricted to the union of ids and ids2 before performing the meta-analysis. This is useful for meta-analytic contrasts, as the resulting images will in effect identify regions that are reported/activated more frequently in one list than in the other. q: The FDR threshold to use when correcting for multiple comparisons. Set to .01 by default. prior: The prior to use when calculating conditional probabilities. This is the prior probability of a feature being used in a study (i.e., p(F)). For example, if set to 0.25, the analysis will assume that 1/4 of studies load on the target feature, as opposed to the empirically estimated p(F), which is len(ids) / total number of studies in the dataset. If prior is not passed, defaults to 0.5, reflecting an effort to put all terms on level footing and avoid undue influence of base rates (because some terms are much more common than others). Note that modifying the prior will only affect the effect size/probability maps, and not the statistical inference (z-score) maps. min_studies: Integer or float indicating which voxels to mask out from results due to lack of stability. If an integer is passed, all voxels that activate in fewer than this number of studies will be ignored (i.e., a value of 0 will be assigned in all output images). If a float in the range of 0 - 1 is passed, this will be interpreted as a proportion to use as the cut-off (e.g., passing 0.03 will exclude all voxels active in fewer than 3% of the entire dataset). Defaults to 1, meaning all voxels that activate at least one study will be kept. """ self.dataset = dataset mt = dataset.image_table self.selected_ids = list(set(mt.ids) & set(ids)) self.selected_id_indices = np.in1d(mt.ids, ids) # If ids2 is provided, we only use mappables explicitly in either ids or ids2. # Otherwise, all mappables not in the ids list are used as the control # condition. unselected_id_indices = ~self.selected_id_indices if ids2 == None else np.in1d( mt.ids, ids2) # Calculate different count variables logger.debug("Calculating counts...") n_selected = len(self.selected_ids) n_unselected = np.sum(unselected_id_indices) n_mappables = n_selected + n_unselected n_selected_active_voxels = mt.data.dot(self.selected_id_indices) n_unselected_active_voxels = mt.data.dot(unselected_id_indices) # Nomenclature for variables below: p = probability, F = feature present, g = given, # U = unselected, A = activation. So, e.g., pAgF = p(A|F) = probability of activation # in a voxel if we know that the feature is present in a study. pF = (n_selected * 1.0) / n_mappables pA = np.array((mt.data.sum(axis=1) * 1.0) / n_mappables).squeeze() # Conditional probabilities logger.debug("Calculating conditional probabilities...") pAgF = n_selected_active_voxels * 1.0 / n_selected pAgU = n_unselected_active_voxels * 1.0 / n_unselected pFgA = pAgF * pF / pA # Recompute conditionals with uniform prior logger.debug("Recomputing with uniform priors...") pAgF_prior = prior * pAgF + (1 - prior) * pAgU pFgA_prior = pAgF * prior / pAgF_prior def p_to_z(p, sign): p = p/2 # convert to two-tailed # prevent underflow p[p < 1e-240] = 1e-240 # Convert to z and assign tail z = np.abs(norm.ppf(p)) * sign # Set invalid voxels to 0 z[np.isinf(z)] = 0.0 return z # One-way chi-square test for consistency of activation p_vals = stats.one_way(np.squeeze(n_selected_active_voxels), n_selected) z_sign = np.sign(n_selected_active_voxels - np.mean(n_selected_active_voxels)).ravel() pAgF_z = p_to_z(p_vals, z_sign) fdr_thresh = stats.fdr(p_vals, q) pAgF_z_FDR = imageutils.threshold_img(pAgF_z, fdr_thresh, p_vals, mask_out='above') # Two-way chi-square for specificity of activation cells = np.squeeze( np.array([[n_selected_active_voxels, n_unselected_active_voxels], [n_selected - n_selected_active_voxels, n_unselected - n_unselected_active_voxels]]).T) p_vals = stats.two_way(cells) z_sign = np.sign(pAgF - pAgU).ravel() pFgA_z = p_to_z(p_vals, z_sign) fdr_thresh = stats.fdr(p_vals, q) pFgA_z_FDR = imageutils.threshold_img(pFgA_z, fdr_thresh, p_vals, mask_out='above') # Retain any images we may want to save or access later self.images = { 'pA': pA, 'pAgF': pAgF, 'pFgA': pFgA, ('pAgF_given_pF=%0.2f' % prior): pAgF_prior, ('pFgA_given_pF=%0.2f' % prior): pFgA_prior, 'pAgF_z': pAgF_z, 'pFgA_z': pFgA_z, ('pAgF_z_FDR_%s' % q): pAgF_z_FDR, ('pFgA_z_FDR_%s' % q): pFgA_z_FDR } # Mask out all voxels below num_studies threshold if min_studies > 0: if isinstance(min_studies, int): min_studies = float( min_studies) / n_mappables # Recalculate as proportion vox_to_exclude = np.where(pA < min_studies)[0] # Create mask # Mask each image for k in self.images: self.images[k][vox_to_exclude] = 0
def __init__(self, dataset, ids, ids2=None, **kwargs): """ Initialize a new MetaAnalysis instance and run an analysis. Args: dataset: A Dataset instance. ids: A list of Mappable IDs to include in the meta-analysis. ids2: Optional second list of Mappable IDs. If passed, the set of studies will be restricted to the union of ids and ids2 before performing the meta-analysis. This is useful for meta-analytic contrasts, as the resulting images will in effect identify regions that are reported/activated more frequently in one list than in the other. kwargs: Additional optional arguments. Currently implemented: q: The FDR threshold to use when correcting for multiple comparisons. Set to .05 by default. prior: The prior to use when calculating conditional probabilities. This is the prior probability of a feature being used in a study (i.e., p(F)). For example, if set to 0.25, the analysis will assume that 1/4 of studies load on the target feature, as opposed to the empirically estimated p(F), which is len(ids) / total number of studies in the dataset. If prior is not passed, defaults to 0.5, reflecting an effort to put all terms on level footing and avoid undue influence of base rates (because some terms are much more common than others). Note that modifying the prior will only affect the effect size/probability maps, and not the statistical inference (z-score) maps. min_studies: Integer or float indicating which voxels to mask out from results due to lack of stability. If an integer is passed, all voxels that activate in fewer than this number of studies will be ignored (i.e., a value of 0 will be assigned in all output images). If a float in the range of 0 - 1 is passed, this will be interpreted as a proportion to use as the cut-off (e.g., passing 0.03 will exclude all voxels active in fewer than 3% of the entire dataset). Defaults to 0, meaning all voxels will be kept. """ ### Set optional parameter defaults ### q = kwargs.get('q', 0.05) # voxel-wise FDR rate prior_pF = kwargs.get( 'prior', 0.5) # prior probability of feature being present min_studies = kwargs.get( 'min_studies', 0) # min. number of studies for voxel inclusion self.dataset = dataset mt = dataset.image_table self.selected_ids = list(set(mt.ids) & set(ids)) self.selected_id_indices = np.in1d(mt.ids, ids) # If ids2 is provided, we only use mappables explicitly in either ids or ids2. # Otherwise, all mappables not in the ids list are used as the control condition. unselected_id_indices = ~self.selected_id_indices if ids2 == None else np.in1d( mt.ids, ids2) # Calculate different count variables # print "Calculating counts..." n_selected = len(self.selected_ids) n_unselected = np.sum(unselected_id_indices) n_mappables = n_selected + n_unselected n_selected_active_voxels = mt.data.dot(self.selected_id_indices) n_unselected_active_voxels = mt.data.dot(unselected_id_indices) # Nomenclature for variables below: p = probability, F = feature present, g = given, # U = unselected, A = activation. So, e.g., pAgF = p(A|F) = probability of activation # in a voxel if we know that the feature is present in a study. pF = (n_selected + 1.0) / (n_mappables + 2) pA = np.array( (mt.data.sum(axis=1) + 1.0) / (n_mappables + 2)).squeeze() # Conditional probabilities, with Laplace smoothing # print "Calculating conditional probabilities..." pAgF = (n_selected_active_voxels + 1.0) / (n_selected + 2) pAgU = (n_unselected_active_voxels + 1.0) / (n_unselected + 2) pFgA = pAgF * pF / pA # Recompute conditionals with uniform prior # print "Recomputing with uniform priors..." pAgF_prior = prior_pF * pAgF + (1 - prior_pF) * pAgU pFgA_prior = pAgF * prior_pF / pAgF_prior # One-way chi-square test for consistency of activation p_vals = stats.one_way(np.squeeze(n_selected_active_voxels), n_selected) p_vals[p_vals < 1e-240] = 1e-240 # prevents overflow due to loss of precision z_sign = np.sign(n_selected_active_voxels - np.mean(n_selected_active_voxels)).ravel() pAgF_z = np.abs(norm.ppf(p_vals / 2)) * z_sign fdr_thresh = stats.fdr(p_vals, q) pAgF_z_FDR = imageutils.threshold_img(pAgF_z, fdr_thresh, p_vals, mask_out='above') # Two-way chi-square for specificity of activation cells = np.squeeze( np.array([[n_selected_active_voxels, n_unselected_active_voxels], [ n_selected - n_selected_active_voxels, n_unselected - n_unselected_active_voxels ]]).T) p_vals = stats.two_way(cells) p_vals[p_vals < 1e-240] = 1e-240 # prevents overflow z_sign = np.sign(pAgF - pAgU).ravel() pFgA_z = np.abs(norm.ppf(p_vals / 2)) * z_sign fdr_thresh = stats.fdr(p_vals, q) pFgA_z_FDR = imageutils.threshold_img(pFgA_z, fdr_thresh, p_vals, mask_out='above') # Retain any images we may want to save or access later self.images = { 'pAgF': pAgF, 'pFgA': pFgA, ('pAgF_given_pF=%0.2f' % prior_pF): pAgF_prior, ('pFgA_given_pF=%0.2f' % prior_pF): pFgA_prior, 'pAgF_z': pAgF_z, 'pFgA_z': pFgA_z, ('pAgF_z_FDR_%s' % q): pAgF_z_FDR, ('pFgA_z_FDR_%s' % q): pFgA_z_FDR } # Mask out all voxels below num_studies threshold if min_studies > 0: if isinstance(min_studies, int): min_studies = float( min_studies) / n_mappables # Recalculate as proportion vox_to_exclude = np.where(pA < min_studies)[0] # Create mask # Mask each image for k in self.images: self.images[k][vox_to_exclude] = 0
def __init__(self, dataset, ids, ids2=None, **kwargs): """ Initiaize a new MetaAnalysis instance and run an analysis. Args: dataset: A Dataset instance. ids: A list of Mappable IDs to include in the meta-analysis. ids2: Optional second list of Mappable IDs. If passed, the set of studies will be restricted to the union of ids and ids2 before performing the meta-analysis. This is useful for meta-analytic contrasts, as the resulting images will in effect identify regions that are reported/activated more frequently in one list than in the other. kwargs: Additional optional arguments. Currently implemented: q: The FDR threshold to use when correcting for multiple comparisons. Set to .01 by default. """ self.dataset = dataset mt = dataset.image_table self.selected_ids = list(set(mt.ids) & set(ids)) self.selected_id_indices = np.in1d(mt.ids, ids) # Calculate different count variables # print "Calculating counts..." n_mappables = len(mt.ids) n_selected = len(self.selected_ids) n_unselected = n_mappables - n_selected # If ids2 is provided, we only use mappables explicitly in either ids or ids2. # Otherwise, all mappables not in the ids list are used as the control condition. unselected_id_indices = ~self.selected_id_indices if ids2 == None else np.in1d(mt.ids, ids2) n_selected_active_voxels = mt.data.dot(self.selected_id_indices) n_unselected_active_voxels = mt.data.dot(unselected_id_indices) # Nomenclature for variables below: p = probability, S = selected, g = given, # U = unselected, A = activation. So, e.g., pAgS = p(A|S) = probability of activation # in voxel given that the mappable is selected (i.e., is included in the ids list # passed to the constructor). pS = (n_selected+1.0)/(n_mappables+2) # Conditional probabilities, with Laplace smoothing # print "Calculating conditional probabilities..." # pA = np.asarray(sparse.spmatrix.mean(mt.data, 1)) + 1.0/n_mappables pA = (n_selected_active_voxels+1.0) / (n_mappables+2) pAgS = (n_selected_active_voxels+1.0)/(n_selected+2) pAgU = (n_unselected_active_voxels+1.0)/(n_unselected+2) pSgA = pAgS * pS / pA # Recompute conditionals with uniform prior # print "Recomputing with uniform priors..." prior_pS = kwargs.get('prior', 0.5) pAgS_unif = prior_pS * pAgS + (1-prior_pS) * pAgU pSgA_unif = pAgS * prior_pS / pAgS_unif # Set voxel-wise FDR to .05 unless explicitly specified q = kwargs.get('q', 0.05) # One-way chi-square test for consistency of activation p_vals = stats.one_way(np.squeeze(n_selected_active_voxels), n_selected) p_vals[p_vals < 1e-240] = 1e-240 # prevents overflow due to loss of precision z_sign = np.sign(n_selected_active_voxels - np.mean(n_selected_active_voxels)).ravel() pAgS_z = np.abs(norm.ppf(p_vals/2)) * z_sign fdr_thresh = stats.fdr(p_vals, q) pAgS_z_FDR = imageutils.threshold_img(pAgS_z, fdr_thresh, p_vals, mask_out='above') # Two-way chi-square for specificity of activation cells = np.squeeze(np.array([[n_selected_active_voxels, n_unselected_active_voxels], [n_selected-n_selected_active_voxels, n_unselected-n_unselected_active_voxels]]).T) p_vals = stats.two_way(cells) p_vals[p_vals < 1e-240] = 1e-240 # prevents overflow z_sign = np.sign(pAgS - pAgU).ravel() pSgA_z = np.abs(norm.ppf(p_vals/2)) * z_sign fdr_thresh = stats.fdr(p_vals, q) pSgA_z_FDR = imageutils.threshold_img(pSgA_z, fdr_thresh, p_vals, mask_out='above') # Retain any images we may want to save or access later self.images = { 'pAgS': pAgS, 'pSgA': pSgA, 'pAgS_unif': pAgS_unif, 'pSgA_unif': pSgA_unif, 'pAgS_z': pAgS_z, 'pSgA_z': pSgA_z, ('pAgS_z_FDR_%s' % q): pAgS_z_FDR, ('pSgA_z_FDR_%s' % q): pSgA_z_FDR }
def __init__(self, dataset, ids, ids2=None, **kwargs): self.dataset = dataset mt = dataset.image_table self.selected_ids = list(set(mt.ids) & set(ids)) self.selected_id_indices = np.in1d(mt.ids, ids) # Calculate different count variables print "Calculating counts..." n_mappables = len(mt.ids) n_selected = len(self.selected_ids) n_unselected = n_mappables - n_selected # If ids2 is provided, we only use mappables explicitly in either ids or ids2. # Otherwise, all mappables not in the ids list are used as the control condition. unselected_id_indices = ~self.selected_id_indices if ids2 == None else np.in1d(mt.ids, ids2) n_selected_active_voxels = mt.data.dot(self.selected_id_indices) n_unselected_active_voxels = mt.data.dot(unselected_id_indices) # Nomenclature for variables below: p = probability, S = selected, g = given, # U = unselected, A = activation. So, e.g., pAgS = p(A|S) = probability of activation # in voxel given that the mappable is selected (i.e., is included in the ids list # passed to the constructor). pS = (n_selected+1.0)/(n_mappables+2) # Conditional probabilities, with Laplace smoothing print "Calculating conditional probabilities..." # pA = np.asarray(sparse.spmatrix.mean(mt.data, 1)) + 1.0/n_mappables pA = (n_selected_active_voxels+1.0) / (n_mappables+2) pAgS = (n_selected_active_voxels+1.0)/(n_selected+2) pAgU = (n_unselected_active_voxels+1.0)/(n_unselected+2) pSgA = pAgS * pS / pA # Recompute conditionals with uniform prior print "Recomputing with uniform priors..." prior_pS = kwargs.get('prior', 0.5) pAgS_unif = prior_pS * pAgS + (1-prior_pS) * pAgU pSgA_unif = pAgS * prior_pS / pAgS_unif # Set voxel-wise FDR to .05 unless explicitly specified q = kwargs.get('q', 0.01) # One-way chi-square test for consistency of activation p_vals = stats.one_way(np.squeeze(n_selected_active_voxels), n_selected) p_vals[p_vals < 1e-240] = 1e-240 # prevents overflow due to loss of precision z_sign = np.sign(n_selected_active_voxels - np.mean(n_selected_active_voxels)).ravel() pAgS_z = np.abs(norm.ppf(p_vals/2)) * z_sign fdr_thresh = stats.fdr(p_vals, q) pAgS_z_FDR = imageutils.threshold_img(pAgS_z, fdr_thresh, p_vals, mask_out='above') # Two-way chi-square for specificity of activation cells = np.squeeze(np.array([[n_selected_active_voxels, n_unselected_active_voxels], [n_selected-n_selected_active_voxels, n_unselected-n_unselected_active_voxels]]).T) p_vals = stats.two_way(cells) p_vals[p_vals < 1e-240] = 1e-240 # prevents overflow z_sign = np.sign(pAgS - pAgU).ravel() pSgA_z = np.abs(norm.ppf(p_vals/2)) * z_sign fdr_thresh = stats.fdr(p_vals, q) pSgA_z_FDR = imageutils.threshold_img(pSgA_z, fdr_thresh, p_vals, mask_out='above') # Retain any images we may want to save later self.images = { 'pAgS': pAgS, 'pSgA': pSgA, 'pAgS_unif': pAgS_unif, 'pSgA_unif': pSgA_unif, 'pAgS_z': pAgS_z, 'pSgA_z': pSgA_z, ('pAgS_z_FDR_%s' % q): pAgS_z_FDR, ('pSgA_z_FDR_%s' % q): pSgA_z_FDR }