def __call__(self, assembly, *args, **kwargs):
     result = Score([assembly.values[0]], dims=['dim'])
     raw = Score(result.copy(),
                 coords={
                     'dim_id': ('dim', [assembly.values[1]]),
                     'division_coord': ('dim', [assembly.values[2]])
                 })
     result.attrs['raw'] = raw
     return result
Exemple #2
0
 def __init__(self, noise_type, parent_category):
     self._noise_type = noise_type
     ceiling = Score([1, np.nan], coords={'aggregation': ['center', 'error']}, dims=['aggregation'])
     super(Imagenet_C_Group, self).__init__(identifier=f'dietterich.Hendrycks2019-{noise_type}-top1', version=1,
                                            ceiling_func=lambda: ceiling,
                                            parent=f'dietterich.Hendrycks2019-{parent_category}-top1',
                                            bibtex=BIBTEX)
Exemple #3
0
def fROI_correlation():
    assembly = load_voxels()

    stories = list(sorted(set(assembly['story'].values)))
    subjects = list(sorted(set(assembly['subject_UID'].values)))
    split_scores = []
    correlate = pearsonr_correlation(xarray_kwargs=dict(correlation_coord='stimulus_id', neuroid_coord='fROI_area'))
    cross_stories_subjects = list(itertools.product(stories, subjects))
    for story, heldout_subject in tqdm(cross_stories_subjects, desc='cross-{story,subject}'):
        story_assembly = assembly[{'presentation': [coord_story == story for coord_story in assembly['story'].values]}]
        subject_pool = story_assembly[{'neuroid': [subject != heldout_subject
                                                   for subject in story_assembly['subject_UID'].values]}]
        subject_pool = average_subregions(subject_pool)
        heldout = story_assembly[{'neuroid': [subject == heldout_subject
                                              for subject in story_assembly['subject_UID'].values]}]
        heldout = average_subregions(heldout)
        split_score = correlate(subject_pool, heldout)
        split_score = type(split_score)(split_score.values, coords={
            coord: (dims, values) for coord, dims, values in walk_coords(split_score)
            if not coord.startswith('subject_') and coord != 'neuroid_id'}, dims=split_score.dims)

        split_score = split_score.expand_dims('heldout_subject').expand_dims('story')
        split_score['heldout_subject'], split_score['story'] = [heldout_subject], [story]
        split_scores.append(split_score)
    correlation = Score.merge(*split_scores)

    correlation = apply_aggregate(lambda scores: scores.mean('neuroid').mean('story'), correlation)
    center = correlation.mean('heldout_subject')
    error = correlation.std('heldout_subject')
    score = Score([center, error], coords={**{'aggregation': ['center', 'error']},
                                           **{coord: (dims, values) for coord, dims, values in walk_coords(center)}},
                  dims=('aggregation',) + center.dims)
    score.attrs[Score.RAW_VALUES_KEY] = correlation.attrs[Score.RAW_VALUES_KEY]
    return score
 def __call__(self, prediction, target):
     # align
     prediction = prediction.sortby(
         [self._correlation_coord, self._neuroid_coord])
     target = target.sortby([self._correlation_coord, self._neuroid_coord])
     assert np.array(prediction[self._correlation_coord].values == target[
         self._correlation_coord].values).all()
     assert np.array(prediction[self._neuroid_coord].values == target[
         self._neuroid_coord].values).all()
     # compute correlation per neuroid
     neuroid_dims = target[self._neuroid_coord].dims
     assert len(neuroid_dims) == 1
     correlations = []
     for i, coord_value in enumerate(target[self._neuroid_coord].values):
         target_neuroids = target.isel(**{
             neuroid_dims[0]: i
         })  # `isel` is about 10x faster than `sel`
         prediction_neuroids = prediction.isel(**{neuroid_dims[0]: i})
         r, p = self._correlation(target_neuroids, prediction_neuroids)
         correlations.append(r)
     # package
     result = Score(correlations,
                    coords={
                        coord: (dims, values)
                        for coord, dims, values in walk_coords(target)
                        if dims == neuroid_dims
                    },
                    dims=neuroid_dims)
     return result
 def test_sel(self):
     score = Score([1, 2], coords={'a': [1, 2]}, dims=['a'])
     score.attrs['raw'] = DataAssembly([0, 2, 1, 3],
                                       coords={'a': [1, 1, 2, 2]},
                                       dims=['a'])
     sel_score = score.sel(a=1)
     np.testing.assert_array_equal(sel_score.raw['a'], [1, 1])
 def test_mean_no_apply_raw(self):
     score = Score([1, 2], coords={'a': [1, 2]}, dims=['a'])
     score.attrs['raw'] = DataAssembly([0, 2, 1, 3],
                                       coords={'a': [1, 1, 2, 2]},
                                       dims=['a'])
     mean_score = score.mean('a', _apply_raw=True)
     assert mean_score.raw == 1.5
Exemple #7
0
    def __init__(self):
        ceiling = Score([1, np.nan],
                        coords={'aggregation': ['center', 'error']},
                        dims=['aggregation'])
        assembly_repetition = get_assembly()
        assert len(np.unique(assembly_repetition['region'])) == 1
        assert hasattr(assembly_repetition, 'repetition')
        self.region = 'IT'
        self.assembly = average_repetition(assembly_repetition)
        self._assembly = self.assembly
        self.timebins = timebins_from_assembly(self.assembly)

        self._similarity_metric = CrossRegressedCorrelation(
            regression=pls_regression(),
            correlation=pearsonr_correlation(),
            crossvalidation_kwargs=dict(
                stratification_coord=Split.Defaults.stratification_coord
                if hasattr(self.assembly, Split.Defaults.stratification_coord
                           ) else None))
        identifier = f'{assembly_repetition.name}-layer_selection'
        ceiler = InternalConsistency()
        super(_MockBenchmark,
              self).__init__(identifier=identifier,
                             ceiling_func=lambda: ceiler(assembly_repetition),
                             version='1.0')
Exemple #8
0
 def __init__(self):
     stimulus_set = pd.read_csv(
         os.path.join(os.path.dirname(__file__), 'imagenet2012.csv'))
     stimulus_set = StimulusSet(stimulus_set)
     stimulus_set.image_paths = {
         row.image_id: row.filepath
         for row in stimulus_set.itertuples()
     }
     self._stimulus_set = stimulus_set
     self._similarity_metric = Accuracy()
     ceiling = Score([1, np.nan],
                     coords={'aggregation': ['center', 'error']},
                     dims=['aggregation'])
     super(Imagenet2012, self).__init__(identifier='fei-fei.Deng2009-top1',
                                        version=1,
                                        ceiling_func=lambda: ceiling,
                                        parent='ImageNet',
                                        bibtex="""@INPROCEEDINGS{5206848,  
                                             author={J. {Deng} and W. {Dong} and R. {Socher} and L. {Li} and  {Kai Li} and  {Li Fei-Fei}},  
                                             booktitle={2009 IEEE Conference on Computer Vision and Pattern Recognition},   
                                             title={ImageNet: A large-scale hierarchical image database},   
                                             year={2009},  
                                             volume={},  
                                             number={},  
                                             pages={248-255},
                                             url = {https://ieeexplore.ieee.org/document/5206848}
                                         }""")
Exemple #9
0
 def __call__(self, candidate):
     self._logger.info('Computing activations')
     model_activations = read_words(
         candidate,
         self._target_assembly.attrs['stimulus_set'],
         reset_column='story_id',
         copy_columns=('stimulus_id', 'word_id', 'sentence_id'))
     assert set(model_activations['stimulus_id'].values) == set(
         self._target_assembly['stimulus_id'].values)
     self._logger.info('Scoring model')
     cross_subject_scores = self._cross_subject(
         self._target_assembly,
         apply=lambda cross_assembly: self._apply_within_subject(
             model_activations, cross_assembly))
     # normalize by ceiling
     # Note that we normalize by an overall ceiling, so the scores per subject are not normalized wrt. that subject
     # and should thus not be used by themselves. Only the aggregate makes sense to report
     normalized_subject_scores = consistency(
         cross_subject_scores.sel(aggregation='center'),
         self.ceiling.sel(aggregation='center'))
     score = normalized_subject_scores.median('subject_id')
     std = normalized_subject_scores.std('subject_id')
     std['aggregation'] = 'error'
     # the MultiIndex tends to mess things up, so we get rid of it here
     score, std = xr.DataArray(score).expand_dims(
         'aggregation'), xr.DataArray(std).expand_dims('aggregation')
     score = Score(Score.merge(score, std))
     score.attrs['raw'] = cross_subject_scores
     score.attrs['ceiling'] = self.ceiling
     return score
Exemple #10
0
 def _apply_cross(self, source_assembly, cross_assembly):
     # some subjects have only done one experiment which leads to nans
     cross_assembly = cross_assembly.dropna('neuroid')
     if len(cross_assembly['neuroid']) == 0:
         return Score([np.nan, np.nan], coords={'aggregation': ['center', 'error']}, dims=['aggregation'])
     return super(_PereiraSubjectWise, self)._apply_cross(
         source_assembly=source_assembly, cross_assembly=cross_assembly)
 def test_mean(self):
     score = Score([1, 2], coords={'a': [1, 2]}, dims=['a'])
     score.attrs['raw'] = DataAssembly([0, 2, 1, 3],
                                       coords={'a': [1, 1, 2, 2]},
                                       dims=['a'])
     mean_score = score.mean('a')
     np.testing.assert_array_equal(mean_score.raw['a'], [1, 1, 2, 2])
Exemple #12
0
    def __init__(self):
        ceiling = Score(
            [.79, np.nan],  # following private conversation with Kohitij Kar
            coords={'aggregation': ['center', 'error']},
            dims=['aggregation'])
        super(DicarloKar2019OST, self).__init__(
            identifier='dicarlo.Kar2019-ost',
            version=2,
            ceiling_func=lambda: ceiling,
            parent='IT-temporal',
            paper_link='https://www.nature.com/articles/s41593-019-0392-5')
        assembly = brainscore.get_assembly('dicarlo.Kar2019')
        # drop duplicate images
        _, index = np.unique(assembly['image_id'], return_index=True)
        assembly = assembly.isel(presentation=index)
        assembly.attrs['stimulus_set'] = assembly.stimulus_set.drop_duplicates(
            'image_id')

        assembly = assembly.sel(decoder='svm')

        self._assembly = assembly
        self._assembly['truth'] = self._assembly['image_label']
        self._assembly.stimulus_set['truth'] = self._assembly.stimulus_set[
            'image_label']

        self._similarity_metric = OSTCorrelation()
        self._visual_degrees = VISUAL_DEGREES
        self._number_of_trials = 44
Exemple #13
0
 def __call__(self, candidate: BrainModel):
     candidate.start_recording('IT', time_bins=self._time_bins)
     stimulus_set = place_on_screen(
         self._assembly.stimulus_set,
         target_visual_degrees=candidate.visual_degrees(),
         source_visual_degrees=self._visual_degrees)
     # Temporal recordings from large candidates take up a lot of memory and compute time.
     # In order to quickly reject recordings that are static over time,
     # we will show one image and check whether the recordings vary over time at all or not.
     # If they don't we can quickly score the candidate with a failure state
     # since it will not be able to predict temporal differences with the OST metric
     check_stimulus_set = stimulus_set[:1]
     check_stimulus_set.identifier = None  # unset identifier to avoid storing (interferes with actual stimulus_set)
     check_recordings = candidate.look_at(
         check_stimulus_set, number_of_trials=self._number_of_trials)
     if not temporally_varying(check_recordings):
         score = Score([np.nan, np.nan],
                       coords={'aggregation': ['center', 'error']},
                       dims=['aggregation'])
     else:
         recordings = candidate.look_at(
             stimulus_set, number_of_trials=self._number_of_trials)
         score = self._similarity_metric(recordings, self._assembly)
     score = ceil_score(score, self.ceiling)
     return score
Exemple #14
0
 def run_evaluation(return_score=False):
     scores = []
     # Loop to handle MNLI double evaluation (matched, mis-matched)
     for eval_task in eval_task_names:
         examples, label_list, output_mode = get_examples(
             data_dir=data_dir, task=eval_task, evaluate=True)
         eval_dataset = model.glue_dataset(
             task=eval_task,
             examples=examples,
             label_list=label_list,
             output_mode=output_mode,
             max_seq_length=max_seq_length)
         result = evaluate(features_model=model,
                           decoder_head=decoder_head,
                           eval_dataset=eval_dataset,
                           task_name=eval_task,
                           output_mode=output_mode,
                           device=device)
         if not return_score:
             return result  # we're ignoring mnli-mm here, but this return is just for progress logging anyway
         score = Score([[value for key, value in result.items()]],
                       coords={
                           'eval_task': [eval_task],
                           'measure': list(result.keys())
                       },
                       dims=['eval_task', 'measure'])
         score.attrs['data_dir'] = data_dir
         score.attrs['benchmark_identifier'] = f"glue-{self.task_name}"
         score.attrs['eval_task'] = eval_task
         score.attrs['model_identifier'] = model.identifier
         scores.append(score)
     scores = Score.merge(*scores)
     return scores
Exemple #15
0
 def _repeat(self, func):
     random_state = self._initialize_random_state()
     repetitions = list(range(self._repetitions))
     scores = [func(random_state=random_state) for repetition in repetitions]
     score = Score(scores, coords={'split': repetitions}, dims=['split'])
     self._save_matrix()
     return apply_aggregate(self.aggregate, score)
Exemple #16
0
 def correlate(self, predicted_osts, target_osts):
     non_nan = np.logical_and(~np.isnan(predicted_osts),
                              ~np.isnan(target_osts))
     predicted_osts, target_osts = predicted_osts[non_nan], target_osts[
         non_nan]
     # use Spearman over Pearson since it tests whether the rank orders are the same,
     # which allows for nonlinear correlates whereas Pearson assumes linearity.
     correlation, p = spearmanr(predicted_osts, target_osts)
     return Score(correlation)
Exemple #17
0
    def __call__(self, model: TaskModel):
        model.mode = TaskModel.Modes.tokens_to_features
        set_seed(self.seed)
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        logger.debug(
            f"Using block size {self.block_size} for {model.identifier}")

        # Data
        vocab_size = min(model.vocab_size, 250000)
        train_tokens = TextDataset(model_identifier=model.identifier,
                                   model=model,
                                   block_size=self.block_size,
                                   vocab_size=vocab_size,
                                   file_path=self.train_data_file)
        val_tokens = TextDataset(model_identifier=model.identifier,
                                 model=model,
                                 block_size=self.block_size,
                                 vocab_size=vocab_size,
                                 file_path=self.val_data_file)
        test_tokens = TextDataset(model_identifier=model.identifier,
                                  model=model,
                                  block_size=self.block_size,
                                  vocab_size=vocab_size,
                                  file_path=self.eval_data_file)

        # Decoder
        logger.info(f"Vocab size: {vocab_size}")
        features_sample, _ = train_tokens[0]
        lm_head = LMHeadModel(features_size=features_sample.shape[-1],
                              vocab_size=vocab_size,
                              embedding_weights=model.get_embedding_weights()
                              if self.tied else None)
        lm_head = lm_head.to(device)

        # Train
        train(model=lm_head,
              train_dataset=train_tokens,
              val_dataset=val_tokens,
              device=device,
              seed=self.seed,
              **self.kwargs)

        # Evaluation
        test_result = evaluate(model=lm_head,
                               eval_dataset=test_tokens,
                               device=device)
        score = Score([test_result[key] for key in ['perplexity', 'loss']],
                      coords={'measure': ['test_perplexity', 'test_loss']},
                      dims=['measure'])
        score.attrs['datasets'] = {
            'train': self.train_data_file,
            'val': self.val_data_file,
            'test': self.eval_data_file
        }
        score.attrs['benchmark_identifier'] = self.identifier
        score.attrs['model_identifier'] = model.identifier
        return score
Exemple #18
0
 def __call__(self, source_recordings, target_osts):
     if len(set(source_recordings['time_bin'].values)
            ) <= 1:  # short-cut for non-temporal models
         return Score([np.nan, np.nan],
                      coords={'aggregation': ['center', 'error']},
                      dims=['aggregation'])
     score = self._cross_validation(source_recordings,
                                    target_osts,
                                    apply=self.apply)
     return score
Exemple #19
0
    def __call__(self, source, target):
        values = source == target
        center = np.mean(values)
        error = np.std(values)

        score = Score([center, error],
                      coords={'aggregation': ['center', 'error']},
                      dims=('aggregation', ))
        score.attrs[Score.RAW_VALUES_KEY] = values
        return score
Exemple #20
0
def aggregate_neuroid_scores(neuroid_scores, subject_column):
    subject_scores = neuroid_scores.groupby(subject_column).median()
    center = subject_scores.median(subject_column)
    subject_values = np.nan_to_num(subject_scores.values, nan=0)  # mad cannot deal with all-nan in one axis, treat as 0
    subject_axis = subject_scores.dims.index(subject_scores[subject_column].dims[0])
    error = median_absolute_deviation(subject_values, axis=subject_axis)
    score = Score([center, error], coords={'aggregation': ['center', 'error']}, dims=['aggregation'])
    score.attrs['raw'] = neuroid_scores
    score.attrs['description'] = "score aggregated by taking median of neuroids per subject, " \
                                 "then median of subject scores"
    return score
Exemple #21
0
    def extrapolate_neuroid(self, ceilings):
        # figure out how many extrapolation x points we have. E.g. for Pereira, not all combinations are possible
        subject_subsamples = list(sorted(set(ceilings['num_subjects'].values)))
        rng = RandomState(0)
        bootstrap_params = []
        for bootstrap in range(self.num_bootstraps):
            bootstrapped_scores = []
            for num_subjects in subject_subsamples:
                num_scores = ceilings.sel(num_subjects=num_subjects)
                # the sub_subjects dimension creates nans, get rid of those
                num_scores = num_scores.dropna(f'sub_{self.subject_column}')
                assert set(num_scores.dims) == {f'sub_{self.subject_column}', 'split'} or \
                       set(num_scores.dims) == {f'sub_{self.subject_column}'}
                # choose from subject subsets and the splits therein, with replacement for variance
                choices = num_scores.values.flatten()
                bootstrapped_score = rng.choice(choices,
                                                size=len(choices),
                                                replace=True)
                bootstrapped_scores.append(np.mean(bootstrapped_score))

            try:
                params = self.fit(subject_subsamples, bootstrapped_scores)
            except RuntimeError:  # optimal parameters not found
                params = [np.nan, np.nan]
            params = DataAssembly([params],
                                  coords={
                                      'bootstrap': [bootstrap],
                                      'param': ['v0', 'tau0']
                                  },
                                  dims=['bootstrap', 'param'])
            bootstrap_params.append(params)
        bootstrap_params = merge_data_arrays(bootstrap_params)
        # find endpoint and error
        asymptote_threshold = .0005
        interpolation_xs = np.arange(1000)
        ys = np.array([
            v(interpolation_xs, *params) for params in bootstrap_params.values
            if not np.isnan(params).any()
        ])
        median_ys = np.median(ys, axis=0)
        diffs = np.diff(median_ys)
        end_x = np.where(diffs < asymptote_threshold)[0].min(
        )  # first x where increase smaller than threshold
        # put together
        center = np.median(np.array(bootstrap_params)[:, 0])
        error = ci_error(ys[:, end_x], center=center)
        score = Score(
            [center] + list(error),
            coords={'aggregation': ['center', 'error_low', 'error_high']},
            dims=['aggregation'])
        score.attrs['raw'] = ceilings
        score.attrs['bootstrapped_params'] = bootstrap_params
        score.attrs['endpoint_x'] = DataAssembly(end_x)
        return score
Exemple #22
0
    def __call__(self, assembly1, assembly2):
        """
        :param brainio_base.assemblies.NeuroidAssembly assembly1:
        :param brainio_base.assemblies.NeuroidAssembly assembly2:
        :return: brainscore.metrics.Score
        """

        rdm1 = self._rdm(assembly1)
        rdm2 = self._rdm(assembly2)
        similarity = self._similarity(rdm1, rdm2)
        return Score(similarity)
Exemple #23
0
 def __call__(self, candidate):
     scores = xr.concat([
         Imagenet_C_Group(group, parent_category=self._category)(candidate)
         for group in self._groups
     ], dim='presentation')
     assert len(set(scores['noise_type'].values)) == len(self._groups)
     center = np.mean(scores)
     error = np.std(scores)
     score = Score([center, error], coords={'aggregation': ['center', 'error']}, dims=('aggregation',))
     score.attrs[Score.RAW_VALUES_KEY] = scores
     return score
Exemple #24
0
 def __init__(self, identifier_suffix, noise_type):
     identifier = f'dietterich.Hendrycks2019.{identifier_suffix}'
     stimulus_set = brainscore.get_stimulus_set(identifier)
     self._stimulus_set = stimulus_set
     self._similarity_metric = Accuracy()
     self._benchmark_name = identifier
     self._noise_type = noise_type
     ceiling = Score([1, np.nan], coords={'aggregation': ['center', 'error']}, dims=['aggregation'])
     super(Imagenet_C_Individual, self).__init__(identifier=f"{identifier}-top1", version=1,
                                                 ceiling_func=lambda: ceiling,
                                                 parent=f'dietterich.Hendrycks2019-{noise_type}-top1',
                                                 bibtex=BIBTEX)
Exemple #25
0
 def __init__(self, category):
     category_groups = {
         'noise': ['gaussian_noise', 'shot_noise', 'impulse_noise'],
         'blur': ['glass_blur', 'motion_blur', 'zoom_blur', 'defocus_blur'],
         'weather': ['snow', 'frost', 'fog', 'brightness'],
         'digital': ['pixelate', 'contrast', 'elastic_transform', 'jpeg_compression']
     }
     self._category = category
     self._groups = category_groups[category]
     ceiling = Score([1, np.nan], coords={'aggregation': ['center', 'error']}, dims=['aggregation'])
     super(Imagenet_C_Category, self).__init__(identifier=f'dietterich.Hendrycks2019-{category}-top1', version=1,
                                               ceiling_func=lambda: ceiling,
                                               parent='dietterich.Hendrycks2019-top1',
                                               bibtex=BIBTEX)
 def test_squeeze(self):
     score = Score([[1, 2]],
                   coords={
                       's': [0],
                       'a': [1, 2]
                   },
                   dims=['s', 'a'])
     score.attrs['raw'] = DataAssembly([[0, 2, 1, 3]],
                                       coords={
                                           's': [0],
                                           'a': [1, 1, 2, 2]
                                       },
                                       dims=['s', 'a'])
     sel_score = score.squeeze('s')
     np.testing.assert_array_equal(sel_score.raw.dims, ['a'])
 def aggregate(cls, values):
     center = values.mean('split')
     error = standard_error_of_the_mean(values, 'split')
     return Score(
         [center, error],
         coords={
             **{
                 'aggregation': ['center', 'error']
             },
             **{
                 coord: (dims, values)
                 for coord, dims, values in walk_coords(center)
             }
         },
         dims=('aggregation', ) + center.dims)
Exemple #28
0
 def __init__(self, stimulus_set, noise_type, noise_category):
     self.stimulus_set = stimulus_set[stimulus_set['noise_type'] ==
                                      noise_type]
     self.noise_type = noise_type
     self.noise_category = noise_category
     ceiling = Score([1, np.nan],
                     coords={'aggregation': ['center', 'error']},
                     dims=['aggregation'])
     super(Imagenet_C_Type, self).__init__(
         identifier=
         f'dietterich.Hendrycks2019-{noise_category}-{noise_type}-top1',
         version=2,
         ceiling_func=lambda: ceiling,
         parent=f'dietterich.Hendrycks2019-{noise_category}-top1',
         bibtex=BIBTEX)
Exemple #29
0
    def __call__(self, candidate: BrainModel):
        self._metric = ScanMatchPy.initialize()

        self._logger.info("## Starting visual search task...")
        candidate.start_task(BrainModel.Task.visual_search,
                             max_fix=self.max_fix,
                             data_len=self.data_len,
                             ior_size=self.ior_size)
        self.cumm_perf, self.saccades = candidate.look_at(self._stimuli)
        # in saccades the last index denotes the index at which the target was found
        fix_model = self.saccades[:, :self.max_fix + 1, :]  # first n saccades
        I_fix_model = self.saccades[:, self.max_fix + 1, :
                                    1]  # index at which the target was found
        fix1 = matlab.int32(fix_model.tolist())
        I_fix1 = matlab.int32(I_fix_model.tolist())
        self._logger.info("## Search task done...\n")

        self._logger.info("## Calculating score...")
        scores = []
        for sub_id in tqdm(range(self.num_sub),
                           desc="comparing with human data: "):
            data_human = self._assemblies.values[sub_id *
                                                 self.data_len:(sub_id + 1) *
                                                 self.data_len]
            fix_human = data_human[:, :self.max_fix + 1, :]
            I_fix_human = data_human[:, self.max_fix + 1, :1]
            fix2 = matlab.int32(fix_human.tolist())
            I_fix2 = matlab.int32(I_fix_human.tolist())
            score = self._metric.findScore(fix1, fix2, I_fix1, I_fix2)
            scores.append(score)

        scores = np.asarray(scores)

        self.raw_score = np.mean(scores)
        self.std = np.std(scores) / np.sqrt(scores.shape[0])

        self.model_score = Score([self.raw_score, self.std],
                                 coords={'aggregation': ['center', 'error']},
                                 dims=['aggregation'])

        self._metric.terminate()

        ceiled_score = ceil_score(self.model_score, self.ceiling)
        self._logger.info("## Score calculated...\n")

        return ceiled_score
Exemple #30
0
    def __init__(self):
        ceiling = Score(
            [.79, np.nan],  # following private conversation with Kohitij Kar
            coords={'aggregation': ['center', 'error']},
            dims=['aggregation'])
        super(DicarloKar2019OST,
              self).__init__(identifier='dicarlo.Kar2019-ost',
                             version=2,
                             ceiling_func=lambda: ceiling,
                             parent='IT-temporal',
                             bibtex="""@Article{Kar2019,
                                                    author={Kar, Kohitij
                                                    and Kubilius, Jonas
                                                    and Schmidt, Kailyn
                                                    and Issa, Elias B.
                                                    and DiCarlo, James J.},
                                                    title={Evidence that recurrent circuits are critical to the ventral stream's execution of core object recognition behavior},
                                                    journal={Nature Neuroscience},
                                                    year={2019},
                                                    month={Jun},
                                                    day={01},
                                                    volume={22},
                                                    number={6},
                                                    pages={974-983},
                                                    abstract={Non-recurrent deep convolutional neural networks (CNNs) are currently the best at modeling core object recognition, a behavior that is supported by the densely recurrent primate ventral stream, culminating in the inferior temporal (IT) cortex. If recurrence is critical to this behavior, then primates should outperform feedforward-only deep CNNs for images that require additional recurrent processing beyond the feedforward IT response. Here we first used behavioral methods to discover hundreds of these `challenge' images. Second, using large-scale electrophysiology, we observed that behaviorally sufficient object identity solutions emerged {\textasciitilde}30{\thinspace}ms later in the IT cortex for challenge images compared with primate performance-matched `control' images. Third, these behaviorally critical late-phase IT response patterns were poorly predicted by feedforward deep CNN activations. Notably, very-deep CNNs and shallower recurrent CNNs better predicted these late IT responses, suggesting that there is a functional equivalence between additional nonlinear transformations and recurrence. Beyond arguing that recurrent circuits are critical for rapid object identification, our results provide strong constraints for future recurrent model development.},
                                                    issn={1546-1726},
                                                    doi={10.1038/s41593-019-0392-5},
                                                    url={https://doi.org/10.1038/s41593-019-0392-5}
                                                    }""")
        assembly = brainscore.get_assembly('dicarlo.Kar2019')
        # drop duplicate images
        _, index = np.unique(assembly['image_id'], return_index=True)
        assembly = assembly.isel(presentation=index)
        assembly.attrs['stimulus_set'] = assembly.stimulus_set.drop_duplicates(
            'image_id')

        assembly = assembly.sel(decoder='svm')

        self._assembly = assembly
        self._assembly['truth'] = self._assembly['image_label']
        self._assembly.stimulus_set['truth'] = self._assembly.stimulus_set[
            'image_label']

        self._similarity_metric = OSTCorrelation()
        self._visual_degrees = VISUAL_DEGREES
        self._number_of_trials = 44