Exemple #1
0
 def __call__(self, candidate):
     scores = []
     for story, story_assembly in self._target_assemblies.items():
         source_assembly = candidate(stimuli=story_assembly.stimulus_set)
         score = self._cross_region(story_assembly,
                                    apply=lambda region_assembly: self._metric(source_assembly, region_assembly))
         score = score.expand_dims('story')
         score['story'] = [story]
         scores.append(score)
     score = Score.merge(*scores)
     score = apply_aggregate(lambda score: score.mean('story'), score)
     score = apply_aggregate(lambda score: score.mean('region'), score)
     return score
Exemple #2
0
 def ceiling_normalize(self, score):
     raw_neuroids = apply_aggregate(lambda values: values.mean('split'),
                                    score.raw)
     score = ceil_neuroids(raw_neuroids,
                           self.ceiling,
                           subject_column='subject_UID')
     return score
Exemple #3
0
    def __call__(self, candidate):
        stimulus_set = self._target_assembly.attrs['stimulus_set']
        stimulus_set.loc[:, 'passage_id'] = stimulus_set[
            'experiment'] + stimulus_set['passage_index'].astype(str)
        model_activations = listen_to(candidate,
                                      stimulus_set,
                                      reset_column='passage_id')
        assert set(model_activations['stimulus_id'].values) == set(
            self._target_assembly['stimulus_id'].values)

        _logger.info('Scoring across experiments & atlases')
        cross_scores = self._cross(
            self._target_assembly,
            apply=lambda cross_assembly: self._apply_cross(
                model_activations, cross_assembly))
        raw_scores = cross_scores.raw
        raw_neuroids = apply_aggregate(
            lambda values: values.mean('split').mean('experiment'), raw_scores)

        # normally we would ceil every single neuroid here. To estimate the strongest ceiling possible (i.e. make it as
        # hard as possible on the models), we used experiment-overlapping neuroids from as many subjects as possible
        # which means some neuroids got excluded. Since median(r/c) is the same as median(r)/median(c), we just
        # normalize the neuroid aggregate by the overall ceiling aggregate.
        # Additionally, the Pereira data also has voxels from DMN, visual etc. but we care about language here.
        language_neuroids = raw_neuroids.sel(atlas='language',
                                             _apply_raw=False)
        score = aggregate_ceiling(language_neuroids,
                                  ceiling=self.ceiling,
                                  subject_column='subject')
        return score
Exemple #4
0
 def ceiling_normalize(self, score):
     raw_neuroids = apply_aggregate(lambda values: values.mean('split'), score.raw)
     if not hasattr(raw_neuroids, 'neuroid_id'):
         raw_neuroids['neuroid_id'] = 'neuroid', [".".join([str(value) for value in values]) for values in zip(*[
             raw_neuroids[coord].values for coord in ['subject_UID', 'fROI_area']])]
     score = ceil_neuroids(raw_neuroids, self.ceiling, subject_column='subject_UID')
     return score
Exemple #5
0
def fROI_correlation():
    assembly = load_voxels()

    stories = list(sorted(set(assembly['story'].values)))
    subjects = list(sorted(set(assembly['subject_UID'].values)))
    split_scores = []
    correlate = pearsonr_correlation(xarray_kwargs=dict(correlation_coord='stimulus_id', neuroid_coord='fROI_area'))
    cross_stories_subjects = list(itertools.product(stories, subjects))
    for story, heldout_subject in tqdm(cross_stories_subjects, desc='cross-{story,subject}'):
        story_assembly = assembly[{'presentation': [coord_story == story for coord_story in assembly['story'].values]}]
        subject_pool = story_assembly[{'neuroid': [subject != heldout_subject
                                                   for subject in story_assembly['subject_UID'].values]}]
        subject_pool = average_subregions(subject_pool)
        heldout = story_assembly[{'neuroid': [subject == heldout_subject
                                              for subject in story_assembly['subject_UID'].values]}]
        heldout = average_subregions(heldout)
        split_score = correlate(subject_pool, heldout)
        split_score = type(split_score)(split_score.values, coords={
            coord: (dims, values) for coord, dims, values in walk_coords(split_score)
            if not coord.startswith('subject_') and coord != 'neuroid_id'}, dims=split_score.dims)

        split_score = split_score.expand_dims('heldout_subject').expand_dims('story')
        split_score['heldout_subject'], split_score['story'] = [heldout_subject], [story]
        split_scores.append(split_score)
    correlation = Score.merge(*split_scores)

    correlation = apply_aggregate(lambda scores: scores.mean('neuroid').mean('story'), correlation)
    center = correlation.mean('heldout_subject')
    error = correlation.std('heldout_subject')
    score = Score([center, error], coords={**{'aggregation': ['center', 'error']},
                                           **{coord: (dims, values) for coord, dims, values in walk_coords(center)}},
                  dims=('aggregation',) + center.dims)
    score.attrs[Score.RAW_VALUES_KEY] = correlation.attrs[Score.RAW_VALUES_KEY]
    return score
Exemple #6
0
 def _repeat(self, func):
     random_state = self._initialize_random_state()
     repetitions = list(range(self._repetitions))
     scores = [func(random_state=random_state) for repetition in repetitions]
     score = Score(scores, coords={'split': repetitions}, dims=['split'])
     self._save_matrix()
     return apply_aggregate(self.aggregate, score)
 def ceil_score(self, score, ceiling):
     assert set(score.raw['split'].values) == set(ceiling.raw['split'].values)
     split_scores = []
     for split in ceiling.raw['split'].values:
         split_score = score.raw.sel(split=split)
         split_ceiling = ceiling.raw.sel(split=split)
         ceiled_split_score = split_score / np.sqrt(split_ceiling)
         ceiled_split_score = ceiled_split_score.expand_dims('split')
         ceiled_split_score['split'] = [split]
         split_scores.append(ceiled_split_score)
     split_scores = Score.merge(*split_scores)
     split_scores = apply_aggregate(self._metric.aggregate, split_scores)
     split_scores.attrs[Score.RAW_VALUES_KEY] = score  # this will override raw per-split ceiled scores which is ok
     split_scores.attrs['ceiling'] = ceiling
     return split_scores
def cross_correlation(prediction, target, cross, correlation):
    assert (prediction[cross] == target[cross]).all()
    scores = []
    coords = [coord for coord, dims, values in walk_coords(target[cross])]
    for cross_value in target[cross].values:
        _prediction = prediction.sel(**{cross: cross_value})
        _target = target.sel(**{cross: cross_value})
        score = correlation(_prediction, _target)
        for coord, coord_value in zip(coords, cross_value):
            score = score.expand_dims(coord)
            score[coord] = [coord_value]
        score = score.stack(**{cross: coords})
        scores.append(score)
    score = merge_data_arrays(scores)
    score = apply_aggregate(lambda score: score.mean(cross), score)
    return score
Exemple #9
0
    def __call__(self, assembly, metric):
        subjects = set(assembly[self.subject_column].values)
        scores = []
        iterate_subjects = self.get_subject_iterations(subjects)
        for subject in tqdm(iterate_subjects, desc='heldout subject'):
            try:
                subject_assembly = assembly[{
                    'neuroid': [
                        subject_value == subject for subject_value in assembly[
                            self.subject_column].values
                    ]
                }]
                # run subject pool as neural candidate
                subject_pool = subjects - {subject}
                pool_assembly = assembly[{
                    'neuroid': [
                        subject in subject_pool
                        for subject in assembly[self.subject_column].values
                    ]
                }]
                score = self.score(pool_assembly,
                                   subject_assembly,
                                   metric=metric)
                # store scores
                apply_raw = 'raw' in score.attrs and \
                            not hasattr(score.raw, self.subject_column)  # only propagate if column not part of score
                score = score.expand_dims(self.subject_column,
                                          _apply_raw=apply_raw)
                score.__setitem__(self.subject_column, [subject],
                                  _apply_raw=apply_raw)
                scores.append(score)
            except NoOverlapException as e:
                self._logger.debug(f"Ignoring no overlap {e}")
                continue  # ignore
            except ValueError as e:
                if "Found array with" in str(e):
                    self._logger.debug(f"Ignoring empty array {e}")
                    continue
                else:
                    raise e

        scores = Score.merge(*scores)
        error = scores.sel(aggregation='center').std(self.subject_column)
        scores = apply_aggregate(
            lambda scores: scores.mean(self.subject_column), scores)
        scores.loc[{'aggregation': 'error'}] = error
        return scores
Exemple #10
0
        def __call__(self, assembly, metric):
            subjects = set(assembly[self.subject_column].values)
            scores = []
            for bootstrap in tqdm(range(self._num_bootstraps),
                                  desc='split-half bootstrap'):
                try:
                    half1 = self._rng.choice(list(subjects),
                                             size=len(subjects) // 2,
                                             replace=False)
                    half2 = subjects - set(half1)
                    half1_assembly = assembly[{
                        'neuroid': [
                            subject_value in half1 for subject_value in
                            assembly[self.subject_column].values
                        ]
                    }]
                    half2_assembly = assembly[{
                        'neuroid': [
                            subject_value in half2 for subject_value in
                            assembly[self.subject_column].values
                        ]
                    }]
                    # run half2 as neural candidate for half1
                    score = self.score(half2_assembly,
                                       half1_assembly,
                                       metric=metric)
                    # store scores
                    score = score.expand_dims("bootstrap", _apply_raw=False)
                    score.__setitem__("bootstrap", [bootstrap],
                                      _apply_raw=False)
                    scores.append(score)
                except NoOverlapException as e:
                    self._logger.debug(f"Ignoring no overlap ({e})")
                    continue  # ignore

            scores = Score.merge(*scores)
            error = scores.sel(aggregation='center').std("bootstrap")
            scores = apply_aggregate(lambda scores: scores.mean("bootstrap"),
                                     scores)
            scores.loc[{'aggregation': 'error'}] = error
            return scores
Exemple #11
0
 def post_process(self, scores):
     scores = apply_aggregate(
         lambda values: values.mean('sub_experiment').mean('experiment'
                                                           ), scores)
     return scores