Example #1
0
    def __init__(self, dataset, ids, ids2=None, q=0.01, prior=0.5, min_studies=1):
        """ Initialize a new MetaAnalysis instance and run an analysis.
        Args:
            dataset: A Dataset instance.
            ids: A list of Mappable IDs to include in the meta-analysis.
            ids2: Optional second list of Mappable IDs. If passed, the set of studies will
                be restricted to the union of ids and ids2 before performing the meta-analysis.
                This is useful for meta-analytic contrasts, as the resulting images will in
                effect identify regions that are reported/activated more frequently in one
                list than in the other.
            q: The FDR threshold to use when correcting for multiple comparisons. Set to
                .01 by default.
            prior: The prior to use when calculating conditional probabilities. This is the
                prior probability of a feature being used in a study (i.e., p(F)). For example,
                if set to 0.25, the analysis will assume that 1/4 of studies load on the target
                feature, as opposed to the empirically estimated p(F), which is len(ids) /
                total number of studies in the dataset. If prior is not passed, defaults to 0.5,
                reflecting an effort to put all terms on level footing and avoid undue influence
                of base rates (because some terms are much more common than others). Note that
                modifying the prior will only affect the effect size/probability maps, and
                not the statistical inference (z-score) maps.
            min_studies: Integer or float indicating which voxels to mask out from results
                due to lack of stability. If an integer is passed, all voxels that activate
                in fewer than this number of studies will be ignored (i.e., a value of 0
                will be assigned in all output images). If a float in the range of 0 - 1 is
                passed, this will be interpreted as a proportion to use as the cut-off (e.g.,
                passing 0.03 will exclude all voxels active in fewer than 3% of the entire
                dataset). Defaults to 1, meaning all voxels that activate at least one study
                will be kept.
        """

        self.dataset = dataset
        mt = dataset.image_table
        self.selected_ids = list(set(mt.ids) & set(ids))
        self.selected_id_indices = np.in1d(mt.ids, ids)

        # If ids2 is provided, we only use mappables explicitly in either ids or ids2.
        # Otherwise, all mappables not in the ids list are used as the control
        # condition.
        unselected_id_indices = ~self.selected_id_indices if ids2 == None else np.in1d(
            mt.ids, ids2)

        # Calculate different count variables
        logger.debug("Calculating counts...")
        n_selected = len(self.selected_ids)
        n_unselected = np.sum(unselected_id_indices)
        n_mappables = n_selected + n_unselected

        n_selected_active_voxels = mt.data.dot(self.selected_id_indices)
        n_unselected_active_voxels = mt.data.dot(unselected_id_indices)

        # Nomenclature for variables below: p = probability, F = feature present, g = given,
        # U = unselected, A = activation. So, e.g., pAgF = p(A|F) = probability of activation
        # in a voxel if we know that the feature is present in a study.
        pF = (n_selected * 1.0) / n_mappables
        pA = np.array((mt.data.sum(axis=1) * 1.0) / n_mappables).squeeze()

        # Conditional probabilities
        logger.debug("Calculating conditional probabilities...")
        pAgF = n_selected_active_voxels * 1.0 / n_selected
        pAgU = n_unselected_active_voxels * 1.0 / n_unselected
        pFgA = pAgF * pF / pA

        # Recompute conditionals with uniform prior
        logger.debug("Recomputing with uniform priors...")
        pAgF_prior = prior * pAgF + (1 - prior) * pAgU
        pFgA_prior = pAgF * prior / pAgF_prior

        def p_to_z(p, sign):
            p = p/2  # convert to two-tailed
            # prevent underflow
            p[p < 1e-240] = 1e-240
            # Convert to z and assign tail
            z = np.abs(norm.ppf(p)) * sign
            # Set invalid voxels to 0
            z[np.isinf(z)] = 0.0
            return z
            
        # One-way chi-square test for consistency of activation
        p_vals = stats.one_way(np.squeeze(n_selected_active_voxels), n_selected)
        z_sign = np.sign(n_selected_active_voxels - np.mean(n_selected_active_voxels)).ravel()
        pAgF_z = p_to_z(p_vals, z_sign)
        fdr_thresh = stats.fdr(p_vals, q)
        pAgF_z_FDR = imageutils.threshold_img(pAgF_z, fdr_thresh, p_vals, mask_out='above')

        # Two-way chi-square for specificity of activation
        cells = np.squeeze(
            np.array([[n_selected_active_voxels, n_unselected_active_voxels],
                      [n_selected - n_selected_active_voxels, n_unselected - n_unselected_active_voxels]]).T)
        p_vals = stats.two_way(cells)
        z_sign = np.sign(pAgF - pAgU).ravel()
        pFgA_z = p_to_z(p_vals, z_sign)
        fdr_thresh = stats.fdr(p_vals, q)
        pFgA_z_FDR = imageutils.threshold_img(pFgA_z, fdr_thresh, p_vals, mask_out='above')

        # Retain any images we may want to save or access later
        self.images = {
            'pA': pA,
            'pAgF': pAgF,
            'pFgA': pFgA,
            ('pAgF_given_pF=%0.2f' % prior): pAgF_prior,
            ('pFgA_given_pF=%0.2f' % prior): pFgA_prior,
            'pAgF_z': pAgF_z,
            'pFgA_z': pFgA_z,
            ('pAgF_z_FDR_%s' % q): pAgF_z_FDR,
            ('pFgA_z_FDR_%s' % q): pFgA_z_FDR
        }

        # Mask out all voxels below num_studies threshold
        if min_studies > 0:
            if isinstance(min_studies, int):
                min_studies = float(
                    min_studies) / n_mappables  # Recalculate as proportion
            vox_to_exclude = np.where(pA < min_studies)[0]  # Create mask
            # Mask each image
            for k in self.images:
                self.images[k][vox_to_exclude] = 0
Example #2
0
    def __init__(self, dataset, ids, ids2=None, **kwargs):
        """ Initialize a new MetaAnalysis instance and run an analysis.
    Args:
      dataset: A Dataset instance.
      ids: A list of Mappable IDs to include in the meta-analysis.
      ids2: Optional second list of Mappable IDs. If passed, the set of studies will
        be restricted to the union of ids and ids2 before performing the meta-analysis.
        This is useful for meta-analytic contrasts, as the resulting images will in
        effect identify regions that are reported/activated more frequently in one
        list than in the other.
      kwargs: Additional optional arguments. Currently implemented:
        q: The FDR threshold to use when correcting for multiple comparisons. Set to
          .05 by default.
        prior: The prior to use when calculating conditional probabilities. This is the
          prior probability of a feature being used in a study (i.e., p(F)). For example,
          if set to 0.25, the analysis will assume that 1/4 of studies load on the target
          feature, as opposed to the empirically estimated p(F), which is len(ids) /
          total number of studies in the dataset. If prior is not passed, defaults to 0.5,
          reflecting an effort to put all terms on level footing and avoid undue influence
          of base rates (because some terms are much more common than others). Note that
          modifying the prior will only affect the effect size/probability maps, and
          not the statistical inference (z-score) maps.
        min_studies: Integer or float indicating which voxels to mask out from results
          due to lack of stability. If an integer is passed, all voxels that activate
          in fewer than this number of studies will be ignored (i.e., a value of 0
          will be assigned in all output images). If a float in the range of 0 - 1 is
          passed, this will be interpreted as a proportion to use as the cut-off (e.g.,
          passing 0.03 will exclude all voxels active in fewer than 3% of the entire
          dataset). Defaults to 0, meaning all voxels will be kept.
    """

        ###  Set optional parameter defaults ###
        q = kwargs.get('q', 0.05)  # voxel-wise FDR rate
        prior_pF = kwargs.get(
            'prior', 0.5)  # prior probability of feature being present
        min_studies = kwargs.get(
            'min_studies', 0)  # min. number of studies for voxel inclusion

        self.dataset = dataset
        mt = dataset.image_table
        self.selected_ids = list(set(mt.ids) & set(ids))
        self.selected_id_indices = np.in1d(mt.ids, ids)

        # If ids2 is provided, we only use mappables explicitly in either ids or ids2.
        # Otherwise, all mappables not in the ids list are used as the control condition.
        unselected_id_indices = ~self.selected_id_indices if ids2 == None else np.in1d(
            mt.ids, ids2)

        # Calculate different count variables
        # print "Calculating counts..."
        n_selected = len(self.selected_ids)
        n_unselected = np.sum(unselected_id_indices)
        n_mappables = n_selected + n_unselected

        n_selected_active_voxels = mt.data.dot(self.selected_id_indices)
        n_unselected_active_voxels = mt.data.dot(unselected_id_indices)

        # Nomenclature for variables below: p = probability, F = feature present, g = given,
        # U = unselected, A = activation. So, e.g., pAgF = p(A|F) = probability of activation
        # in a voxel if we know that the feature is present in a study.
        pF = (n_selected + 1.0) / (n_mappables + 2)
        pA = np.array(
            (mt.data.sum(axis=1) + 1.0) / (n_mappables + 2)).squeeze()

        # Conditional probabilities, with Laplace smoothing
        # print "Calculating conditional probabilities..."
        pAgF = (n_selected_active_voxels + 1.0) / (n_selected + 2)
        pAgU = (n_unselected_active_voxels + 1.0) / (n_unselected + 2)
        pFgA = pAgF * pF / pA

        # Recompute conditionals with uniform prior
        # print "Recomputing with uniform priors..."
        pAgF_prior = prior_pF * pAgF + (1 - prior_pF) * pAgU
        pFgA_prior = pAgF * prior_pF / pAgF_prior

        # One-way chi-square test for consistency of activation
        p_vals = stats.one_way(np.squeeze(n_selected_active_voxels),
                               n_selected)
        p_vals[p_vals <
               1e-240] = 1e-240  # prevents overflow due to loss of precision
        z_sign = np.sign(n_selected_active_voxels -
                         np.mean(n_selected_active_voxels)).ravel()
        pAgF_z = np.abs(norm.ppf(p_vals / 2)) * z_sign

        fdr_thresh = stats.fdr(p_vals, q)
        pAgF_z_FDR = imageutils.threshold_img(pAgF_z,
                                              fdr_thresh,
                                              p_vals,
                                              mask_out='above')

        # Two-way chi-square for specificity of activation
        cells = np.squeeze(
            np.array([[n_selected_active_voxels, n_unselected_active_voxels],
                      [
                          n_selected - n_selected_active_voxels,
                          n_unselected - n_unselected_active_voxels
                      ]]).T)

        p_vals = stats.two_way(cells)
        p_vals[p_vals < 1e-240] = 1e-240  # prevents overflow
        z_sign = np.sign(pAgF - pAgU).ravel()
        pFgA_z = np.abs(norm.ppf(p_vals / 2)) * z_sign
        fdr_thresh = stats.fdr(p_vals, q)
        pFgA_z_FDR = imageutils.threshold_img(pFgA_z,
                                              fdr_thresh,
                                              p_vals,
                                              mask_out='above')

        # Retain any images we may want to save or access later
        self.images = {
            'pAgF': pAgF,
            'pFgA': pFgA,
            ('pAgF_given_pF=%0.2f' % prior_pF): pAgF_prior,
            ('pFgA_given_pF=%0.2f' % prior_pF): pFgA_prior,
            'pAgF_z': pAgF_z,
            'pFgA_z': pFgA_z,
            ('pAgF_z_FDR_%s' % q): pAgF_z_FDR,
            ('pFgA_z_FDR_%s' % q): pFgA_z_FDR
        }

        # Mask out all voxels below num_studies threshold
        if min_studies > 0:
            if isinstance(min_studies, int):
                min_studies = float(
                    min_studies) / n_mappables  # Recalculate as proportion
            vox_to_exclude = np.where(pA < min_studies)[0]  # Create mask
            # Mask each image
            for k in self.images:
                self.images[k][vox_to_exclude] = 0
Example #3
0
  def __init__(self, dataset, ids, ids2=None, **kwargs):

    """ Initiaize a new MetaAnalysis instance and run an analysis.
    Args:
      dataset: A Dataset instance.
      ids: A list of Mappable IDs to include in the meta-analysis.
      ids2: Optional second list of Mappable IDs. If passed, the set of studies will 
        be restricted to the union of ids and ids2 before performing the meta-analysis.
        This is useful for meta-analytic contrasts, as the resulting images will in 
        effect identify regions that are reported/activated more frequently in one 
        list than in the other.
      kwargs: Additional optional arguments. Currently implemented:
        q: The FDR threshold to use when correcting for multiple comparisons. Set to 
          .01 by default.
    """

    self.dataset = dataset
    mt = dataset.image_table
    self.selected_ids = list(set(mt.ids) & set(ids))
    self.selected_id_indices = np.in1d(mt.ids, ids)

    # Calculate different count variables
    # print "Calculating counts..."
    n_mappables = len(mt.ids)
    n_selected = len(self.selected_ids)
    n_unselected = n_mappables - n_selected
    
    # If ids2 is provided, we only use mappables explicitly in either ids or ids2.
    # Otherwise, all mappables not in the ids list are used as the control condition.
    unselected_id_indices = ~self.selected_id_indices if ids2 == None else np.in1d(mt.ids, ids2)
    
    n_selected_active_voxels = mt.data.dot(self.selected_id_indices)
    n_unselected_active_voxels = mt.data.dot(unselected_id_indices)

    # Nomenclature for variables below: p = probability, S = selected, g = given, 
    # U = unselected, A = activation. So, e.g., pAgS = p(A|S) = probability of activation 
    # in voxel given that the mappable is selected (i.e., is included in the ids list 
    # passed to the constructor).
    pS = (n_selected+1.0)/(n_mappables+2)

    # Conditional probabilities, with Laplace smoothing
    # print "Calculating conditional probabilities..."
    # pA = np.asarray(sparse.spmatrix.mean(mt.data, 1)) + 1.0/n_mappables
    pA = (n_selected_active_voxels+1.0) / (n_mappables+2)
    pAgS = (n_selected_active_voxels+1.0)/(n_selected+2)
    pAgU = (n_unselected_active_voxels+1.0)/(n_unselected+2)
    pSgA = pAgS * pS / pA
    
    # Recompute conditionals with uniform prior
    # print "Recomputing with uniform priors..."
    prior_pS = kwargs.get('prior', 0.5)
    pAgS_unif = prior_pS * pAgS + (1-prior_pS) * pAgU
    pSgA_unif = pAgS * prior_pS / pAgS_unif

    # Set voxel-wise FDR to .05 unless explicitly specified 
    q = kwargs.get('q', 0.05)

    # One-way chi-square test for consistency of activation
    p_vals = stats.one_way(np.squeeze(n_selected_active_voxels), n_selected)
    p_vals[p_vals < 1e-240] = 1e-240  # prevents overflow due to loss of precision
    z_sign = np.sign(n_selected_active_voxels - np.mean(n_selected_active_voxels)).ravel()
    pAgS_z = np.abs(norm.ppf(p_vals/2)) * z_sign
    
    fdr_thresh = stats.fdr(p_vals, q)
    pAgS_z_FDR = imageutils.threshold_img(pAgS_z, fdr_thresh, p_vals, mask_out='above')
    

    # Two-way chi-square for specificity of activation
    cells = np.squeeze(np.array([[n_selected_active_voxels, n_unselected_active_voxels],
      [n_selected-n_selected_active_voxels, n_unselected-n_unselected_active_voxels]]).T)

    p_vals = stats.two_way(cells)
    p_vals[p_vals < 1e-240] = 1e-240  # prevents overflow
    z_sign = np.sign(pAgS - pAgU).ravel()
    pSgA_z = np.abs(norm.ppf(p_vals/2)) * z_sign
    fdr_thresh = stats.fdr(p_vals, q)
    pSgA_z_FDR = imageutils.threshold_img(pSgA_z, fdr_thresh, p_vals, mask_out='above')

    # Retain any images we may want to save or access later
    self.images = { 'pAgS': pAgS,
            'pSgA': pSgA,
            'pAgS_unif': pAgS_unif,
            'pSgA_unif': pSgA_unif,
            'pAgS_z': pAgS_z,
            'pSgA_z': pSgA_z,
            ('pAgS_z_FDR_%s' % q): pAgS_z_FDR,
            ('pSgA_z_FDR_%s' % q): pSgA_z_FDR }
Example #4
0
	def __init__(self, dataset, ids, ids2=None, **kwargs):

		self.dataset = dataset
		mt = dataset.image_table
		self.selected_ids = list(set(mt.ids) & set(ids))
		self.selected_id_indices = np.in1d(mt.ids, ids)

		# Calculate different count variables
		print "Calculating counts..."
		n_mappables = len(mt.ids)
		n_selected = len(self.selected_ids)
		n_unselected = n_mappables - n_selected
		
		# If ids2 is provided, we only use mappables explicitly in either ids or ids2.
		# Otherwise, all mappables not in the ids list are used as the control condition.
		unselected_id_indices = ~self.selected_id_indices if ids2 == None else np.in1d(mt.ids, ids2)
		
		n_selected_active_voxels = mt.data.dot(self.selected_id_indices)
		n_unselected_active_voxels = mt.data.dot(unselected_id_indices)

		# Nomenclature for variables below: p = probability, S = selected, g = given, 
		# U = unselected, A = activation. So, e.g., pAgS = p(A|S) = probability of activation 
		# in voxel given that the mappable is selected (i.e., is included in the ids list 
		# passed to the constructor).
		pS = (n_selected+1.0)/(n_mappables+2)

		# Conditional probabilities, with Laplace smoothing
		print "Calculating conditional probabilities..."
		# pA = np.asarray(sparse.spmatrix.mean(mt.data, 1)) + 1.0/n_mappables
		pA = (n_selected_active_voxels+1.0) / (n_mappables+2)
		pAgS = (n_selected_active_voxels+1.0)/(n_selected+2)
		pAgU = (n_unselected_active_voxels+1.0)/(n_unselected+2)
		pSgA = pAgS * pS / pA
		
		# Recompute conditionals with uniform prior
		print "Recomputing with uniform priors..."
		prior_pS = kwargs.get('prior', 0.5)
		pAgS_unif = prior_pS * pAgS + (1-prior_pS) * pAgU
		pSgA_unif = pAgS * prior_pS / pAgS_unif

		# Set voxel-wise FDR to .05 unless explicitly specified	
		q = kwargs.get('q', 0.01)

		# One-way chi-square test for consistency of activation
		p_vals = stats.one_way(np.squeeze(n_selected_active_voxels), n_selected)
		p_vals[p_vals < 1e-240] = 1e-240  # prevents overflow due to loss of precision
		z_sign = np.sign(n_selected_active_voxels - np.mean(n_selected_active_voxels)).ravel()
		pAgS_z = np.abs(norm.ppf(p_vals/2)) * z_sign
		
		fdr_thresh = stats.fdr(p_vals, q)
		pAgS_z_FDR = imageutils.threshold_img(pAgS_z, fdr_thresh, p_vals, mask_out='above')
		

		# Two-way chi-square for specificity of activation
		cells = np.squeeze(np.array([[n_selected_active_voxels, n_unselected_active_voxels],
			[n_selected-n_selected_active_voxels, n_unselected-n_unselected_active_voxels]]).T)
		p_vals = stats.two_way(cells)
		p_vals[p_vals < 1e-240] = 1e-240  # prevents overflow
		z_sign = np.sign(pAgS - pAgU).ravel()
		pSgA_z = np.abs(norm.ppf(p_vals/2)) * z_sign
		fdr_thresh = stats.fdr(p_vals, q)
		pSgA_z_FDR = imageutils.threshold_img(pSgA_z, fdr_thresh, p_vals, mask_out='above')

		# Retain any images we may want to save later
		self.images = { 'pAgS': pAgS,
						'pSgA': pSgA,
						'pAgS_unif': pAgS_unif,
						'pSgA_unif': pSgA_unif,
						'pAgS_z': pAgS_z,
						'pSgA_z': pSgA_z,
						('pAgS_z_FDR_%s' % q): pAgS_z_FDR,
						('pSgA_z_FDR_%s' % q): pSgA_z_FDR }