def __call__(self, candidate): scores = [] for story, story_assembly in self._target_assemblies.items(): source_assembly = candidate(stimuli=story_assembly.stimulus_set) score = self._cross_region(story_assembly, apply=lambda region_assembly: self._metric(source_assembly, region_assembly)) score = score.expand_dims('story') score['story'] = [story] scores.append(score) score = Score.merge(*scores) score = apply_aggregate(lambda score: score.mean('story'), score) score = apply_aggregate(lambda score: score.mean('region'), score) return score
def ceiling_normalize(self, score): raw_neuroids = apply_aggregate(lambda values: values.mean('split'), score.raw) score = ceil_neuroids(raw_neuroids, self.ceiling, subject_column='subject_UID') return score
def __call__(self, candidate): stimulus_set = self._target_assembly.attrs['stimulus_set'] stimulus_set.loc[:, 'passage_id'] = stimulus_set[ 'experiment'] + stimulus_set['passage_index'].astype(str) model_activations = listen_to(candidate, stimulus_set, reset_column='passage_id') assert set(model_activations['stimulus_id'].values) == set( self._target_assembly['stimulus_id'].values) _logger.info('Scoring across experiments & atlases') cross_scores = self._cross( self._target_assembly, apply=lambda cross_assembly: self._apply_cross( model_activations, cross_assembly)) raw_scores = cross_scores.raw raw_neuroids = apply_aggregate( lambda values: values.mean('split').mean('experiment'), raw_scores) # normally we would ceil every single neuroid here. To estimate the strongest ceiling possible (i.e. make it as # hard as possible on the models), we used experiment-overlapping neuroids from as many subjects as possible # which means some neuroids got excluded. Since median(r/c) is the same as median(r)/median(c), we just # normalize the neuroid aggregate by the overall ceiling aggregate. # Additionally, the Pereira data also has voxels from DMN, visual etc. but we care about language here. language_neuroids = raw_neuroids.sel(atlas='language', _apply_raw=False) score = aggregate_ceiling(language_neuroids, ceiling=self.ceiling, subject_column='subject') return score
def ceiling_normalize(self, score): raw_neuroids = apply_aggregate(lambda values: values.mean('split'), score.raw) if not hasattr(raw_neuroids, 'neuroid_id'): raw_neuroids['neuroid_id'] = 'neuroid', [".".join([str(value) for value in values]) for values in zip(*[ raw_neuroids[coord].values for coord in ['subject_UID', 'fROI_area']])] score = ceil_neuroids(raw_neuroids, self.ceiling, subject_column='subject_UID') return score
def fROI_correlation(): assembly = load_voxels() stories = list(sorted(set(assembly['story'].values))) subjects = list(sorted(set(assembly['subject_UID'].values))) split_scores = [] correlate = pearsonr_correlation(xarray_kwargs=dict(correlation_coord='stimulus_id', neuroid_coord='fROI_area')) cross_stories_subjects = list(itertools.product(stories, subjects)) for story, heldout_subject in tqdm(cross_stories_subjects, desc='cross-{story,subject}'): story_assembly = assembly[{'presentation': [coord_story == story for coord_story in assembly['story'].values]}] subject_pool = story_assembly[{'neuroid': [subject != heldout_subject for subject in story_assembly['subject_UID'].values]}] subject_pool = average_subregions(subject_pool) heldout = story_assembly[{'neuroid': [subject == heldout_subject for subject in story_assembly['subject_UID'].values]}] heldout = average_subregions(heldout) split_score = correlate(subject_pool, heldout) split_score = type(split_score)(split_score.values, coords={ coord: (dims, values) for coord, dims, values in walk_coords(split_score) if not coord.startswith('subject_') and coord != 'neuroid_id'}, dims=split_score.dims) split_score = split_score.expand_dims('heldout_subject').expand_dims('story') split_score['heldout_subject'], split_score['story'] = [heldout_subject], [story] split_scores.append(split_score) correlation = Score.merge(*split_scores) correlation = apply_aggregate(lambda scores: scores.mean('neuroid').mean('story'), correlation) center = correlation.mean('heldout_subject') error = correlation.std('heldout_subject') score = Score([center, error], coords={**{'aggregation': ['center', 'error']}, **{coord: (dims, values) for coord, dims, values in walk_coords(center)}}, dims=('aggregation',) + center.dims) score.attrs[Score.RAW_VALUES_KEY] = correlation.attrs[Score.RAW_VALUES_KEY] return score
def _repeat(self, func): random_state = self._initialize_random_state() repetitions = list(range(self._repetitions)) scores = [func(random_state=random_state) for repetition in repetitions] score = Score(scores, coords={'split': repetitions}, dims=['split']) self._save_matrix() return apply_aggregate(self.aggregate, score)
def ceil_score(self, score, ceiling): assert set(score.raw['split'].values) == set(ceiling.raw['split'].values) split_scores = [] for split in ceiling.raw['split'].values: split_score = score.raw.sel(split=split) split_ceiling = ceiling.raw.sel(split=split) ceiled_split_score = split_score / np.sqrt(split_ceiling) ceiled_split_score = ceiled_split_score.expand_dims('split') ceiled_split_score['split'] = [split] split_scores.append(ceiled_split_score) split_scores = Score.merge(*split_scores) split_scores = apply_aggregate(self._metric.aggregate, split_scores) split_scores.attrs[Score.RAW_VALUES_KEY] = score # this will override raw per-split ceiled scores which is ok split_scores.attrs['ceiling'] = ceiling return split_scores
def cross_correlation(prediction, target, cross, correlation): assert (prediction[cross] == target[cross]).all() scores = [] coords = [coord for coord, dims, values in walk_coords(target[cross])] for cross_value in target[cross].values: _prediction = prediction.sel(**{cross: cross_value}) _target = target.sel(**{cross: cross_value}) score = correlation(_prediction, _target) for coord, coord_value in zip(coords, cross_value): score = score.expand_dims(coord) score[coord] = [coord_value] score = score.stack(**{cross: coords}) scores.append(score) score = merge_data_arrays(scores) score = apply_aggregate(lambda score: score.mean(cross), score) return score
def __call__(self, assembly, metric): subjects = set(assembly[self.subject_column].values) scores = [] iterate_subjects = self.get_subject_iterations(subjects) for subject in tqdm(iterate_subjects, desc='heldout subject'): try: subject_assembly = assembly[{ 'neuroid': [ subject_value == subject for subject_value in assembly[ self.subject_column].values ] }] # run subject pool as neural candidate subject_pool = subjects - {subject} pool_assembly = assembly[{ 'neuroid': [ subject in subject_pool for subject in assembly[self.subject_column].values ] }] score = self.score(pool_assembly, subject_assembly, metric=metric) # store scores apply_raw = 'raw' in score.attrs and \ not hasattr(score.raw, self.subject_column) # only propagate if column not part of score score = score.expand_dims(self.subject_column, _apply_raw=apply_raw) score.__setitem__(self.subject_column, [subject], _apply_raw=apply_raw) scores.append(score) except NoOverlapException as e: self._logger.debug(f"Ignoring no overlap {e}") continue # ignore except ValueError as e: if "Found array with" in str(e): self._logger.debug(f"Ignoring empty array {e}") continue else: raise e scores = Score.merge(*scores) error = scores.sel(aggregation='center').std(self.subject_column) scores = apply_aggregate( lambda scores: scores.mean(self.subject_column), scores) scores.loc[{'aggregation': 'error'}] = error return scores
def __call__(self, assembly, metric): subjects = set(assembly[self.subject_column].values) scores = [] for bootstrap in tqdm(range(self._num_bootstraps), desc='split-half bootstrap'): try: half1 = self._rng.choice(list(subjects), size=len(subjects) // 2, replace=False) half2 = subjects - set(half1) half1_assembly = assembly[{ 'neuroid': [ subject_value in half1 for subject_value in assembly[self.subject_column].values ] }] half2_assembly = assembly[{ 'neuroid': [ subject_value in half2 for subject_value in assembly[self.subject_column].values ] }] # run half2 as neural candidate for half1 score = self.score(half2_assembly, half1_assembly, metric=metric) # store scores score = score.expand_dims("bootstrap", _apply_raw=False) score.__setitem__("bootstrap", [bootstrap], _apply_raw=False) scores.append(score) except NoOverlapException as e: self._logger.debug(f"Ignoring no overlap ({e})") continue # ignore scores = Score.merge(*scores) error = scores.sel(aggregation='center').std("bootstrap") scores = apply_aggregate(lambda scores: scores.mean("bootstrap"), scores) scores.loc[{'aggregation': 'error'}] = error return scores
def post_process(self, scores): scores = apply_aggregate( lambda values: values.mean('sub_experiment').mean('experiment' ), scores) return scores