예제 #1
0
 def __init__(self, identifier):
     regression = linear_regression(xarray_kwargs=dict(stimulus_coord='stimulus_id'))  # word
     correlation = pearsonr_correlation(xarray_kwargs=dict(correlation_coord='stimulus_id'))
     metric = CrossRegressedCorrelation(regression=regression, correlation=correlation,
                                        crossvalidation_kwargs=dict(splits=5, kfold=True, split_coord='stimulus_id',
                                                                    stratification_coord='sentence_id'))
     super(Fedorenko2016Encoding, self).__init__(identifier=identifier, metric=metric)
예제 #2
0
 def __init__(self,
              identifier,
              split_coord='word',
              unique_split_values=False):
     self._logger = logging.getLogger(fullname(self))
     self._identifier = identifier
     assembly = LazyLoad(self._load_assembly)
     self._target_assembly = assembly
     regression = linear_regression(xarray_kwargs=dict(
         stimulus_coord='word_id',
         neuroid_coord='subject_id'))  # used for sorting -- keep at word_id
     correlation = pearsonr_correlation(xarray_kwargs=dict(
         correlation_coord='word_id',
         neuroid_coord='subject_id'))  # used for sorting -- keep at word_id
     self._metric = CrossRegressedCorrelation(
         regression=regression,
         correlation=correlation,
         crossvalidation_kwargs=dict(
             splits=5,
             kfold=True,
             split_coord=split_coord,
             stratification_coord='sentence_id',
             unique_split_values=unique_split_values))
     self._cross_subject = CartesianProduct(dividers=['subject_id'])
     self._ceiler = self.ManySubjectExtrapolationCeiling(
         subject_column='subject_id')
예제 #3
0
 def __init__(self, **kwargs):
     metric = CrossRegressedCorrelation(
         regression=linear_regression(xarray_kwargs=dict(stimulus_coord='stimulus_id')),
         correlation=pearsonr_correlation(xarray_kwargs=dict(correlation_coord='stimulus_id')),
         crossvalidation_kwargs=dict(split_coord='stimulus_id', stratification_coord=None))
     metric = Invert(metric)
     super(PereiraDecoding, self).__init__(metric=metric, **kwargs)
예제 #4
0
    def __init__(self, *args, **kwargs):
        super(Blank2014fROIEncoding, self).__init__(*args, **kwargs)

        regression = linear_regression(xarray_kwargs=dict(
            stimulus_coord='stimulus_id', neuroid_coord='fROI_area'))
        correlation = pearsonr_correlation(xarray_kwargs=dict(
            correlation_coord='stimulus_id', neuroid_coord='fROI_area'))
        self._metric = CrossRegressedCorrelation(
            regression=regression, correlation=correlation,
            crossvalidation_kwargs=dict(splits=5, kfold=True, split_coord='stimulus_id', stratification_coord='story'))
예제 #5
0
    def __init__(self, identifier, bold_shift=4):
        self._identifier = identifier
        assembly = LazyLoad(lambda: self._load_assembly(bold_shift))
        self._target_assembly = assembly
        regression = linear_regression(xarray_kwargs=dict(
            stimulus_coord='stimulus_id', neuroid_coord='neuroid_id'))
        correlation = pearsonr_correlation(xarray_kwargs=dict(
            correlation_coord='stimulus_id', neuroid_coord='neuroid_id'))
        self._metric = CrossRegressedCorrelation(
            regression=regression, correlation=correlation,
            crossvalidation_kwargs=dict(splits=5, kfold=True, split_coord='stimulus_id', stratification_coord='story'))

        self._ceiler = ExtrapolationCeiling(subject_column='subject_UID', post_process=self.post_process_ceilings)
예제 #6
0
 def __init__(self, *args, **kwargs):
     super(Futrell2018SentencesEncoding, self).__init__(*args, **kwargs)
     regression = linear_regression(xarray_kwargs=dict(
         stimulus_coord='word_id', neuroid_coord='subject_id'))
     correlation = pearsonr_correlation(xarray_kwargs=dict(
         correlation_coord='word_id', neuroid_coord='subject_id'))
     self._metric = CrossRegressedCorrelation(
         regression=regression,
         correlation=correlation,
         crossvalidation_kwargs=dict(splits=5,
                                     kfold=True,
                                     unique_split_values=True,
                                     split_coord='sentence_id',
                                     stratification_coord=None))
예제 #7
0
 def __init__(self, identifier):
     self._logger = logging.getLogger(fullname(self))
     self._identifier = identifier
     assembly = LazyLoad(self._load_assembly)
     self._target_assembly = assembly
     regression = linear_regression(xarray_kwargs=dict(
         stimulus_coord='word_id', neuroid_coord='subject_id'))
     correlation = pearsonr_correlation(xarray_kwargs=dict(
         correlation_coord='word_id', neuroid_coord='subject_id'))
     self._metric = CrossRegressedCorrelation(
         regression=regression,
         correlation=correlation,
         crossvalidation_kwargs=dict(splits=5,
                                     kfold=True,
                                     split_coord='word_id',
                                     stratification_coord='sentence_id'))
예제 #8
0
def load_Pereira2018_Blank_languageresiduals():
    # hijack the corresponding encoding benchmark to regress, but then store residuals instead of correlate
    from neural_nlp.benchmarks.neural import PereiraEncoding
    benchmark = PereiraEncoding()
    assembly, cross = benchmark._target_assembly, benchmark._cross
    residuals = []

    def store_residuals(nonlanguage_prediction, language_target):
        residual = language_target - nonlanguage_prediction
        residuals.append(residual)
        return Score([0],
                     coords={'neuroid_id': ('neuroid', [0])},
                     dims=['neuroid'])  # dummy score

    pseudo_metric = CrossRegressedCorrelation(regression=linear_regression(
        xarray_kwargs=dict(stimulus_coord='stimulus_id')),
                                              correlation=store_residuals,
                                              crossvalidation_kwargs=dict(
                                                  splits=5,
                                                  kfold=True,
                                                  split_coord='stimulus_id',
                                                  stratification_coord=None))

    # separate language from non-language networks
    language_assembly = assembly[{
        'neuroid': [
            atlas in ['DMN', 'MD', 'language']
            for atlas in assembly['atlas'].values
        ]
    }]
    nonlanguage_assembly = assembly[{
        'neuroid': [
            atlas in ['visual', 'auditory']
            for atlas in assembly['atlas'].values
        ]
    }]

    # run
    def apply_cross(source_assembly, target_assembly):
        # filter experiment
        source_assembly = source_assembly[{
            'presentation': [
                stimulus_id in target_assembly['stimulus_id'].values
                for stimulus_id in source_assembly['stimulus_id'].values
            ]
        }]
        assert all(source_assembly['stimulus_id'].values ==
                   target_assembly['stimulus_id'].values)
        # filter subjects that have not done this experiment
        source_assembly = source_assembly.dropna('neuroid')
        # for the target assembly, it's going to become awkward if we just drop those neuroids.
        # instead, we set them to zero which makes for simple zero regression weights.
        target_assembly = target_assembly.fillna(0)
        # this will regress from joint visual+auditory neural space to one of the language networks
        return pseudo_metric(source_assembly, target_assembly)

    cross(language_assembly,
          apply=lambda cross_assembly: apply_cross(nonlanguage_assembly,
                                                   cross_assembly))

    # combine residuals
    assert len(
        residuals
    ) == 5 * 2 * 3  # 5-fold CV, 2 experiments, 3 language brain networks
    # ensure uniqueness
    neuroid_ids, stimulus_ids = [], []
    for residual in residuals:
        neuroid_ids += residual['neuroid_id'].values.tolist()
        stimulus_ids += residual['stimulus_id'].values.tolist()
    assert len(neuroid_ids) == len(language_assembly['neuroid']) * 5 * 2
    assert len(set(neuroid_ids)) == len(
        set(language_assembly['neuroid_id'].values))
    assert len(stimulus_ids) == len(language_assembly['presentation']) * 3
    assert len(set(stimulus_ids)) == len(
        set(language_assembly['stimulus_id'].values))
    residuals = merge_data_arrays(residuals)
    residuals = type(language_assembly)(residuals)
    residuals.attrs['stimulus_set'] = assembly.stimulus_set
    return residuals