Beispiel #1
0
def fROI_correlation():
    assembly = load_voxels()

    stories = list(sorted(set(assembly['story'].values)))
    subjects = list(sorted(set(assembly['subject_UID'].values)))
    split_scores = []
    correlate = pearsonr_correlation(xarray_kwargs=dict(correlation_coord='stimulus_id', neuroid_coord='fROI_area'))
    cross_stories_subjects = list(itertools.product(stories, subjects))
    for story, heldout_subject in tqdm(cross_stories_subjects, desc='cross-{story,subject}'):
        story_assembly = assembly[{'presentation': [coord_story == story for coord_story in assembly['story'].values]}]
        subject_pool = story_assembly[{'neuroid': [subject != heldout_subject
                                                   for subject in story_assembly['subject_UID'].values]}]
        subject_pool = average_subregions(subject_pool)
        heldout = story_assembly[{'neuroid': [subject == heldout_subject
                                              for subject in story_assembly['subject_UID'].values]}]
        heldout = average_subregions(heldout)
        split_score = correlate(subject_pool, heldout)
        split_score = type(split_score)(split_score.values, coords={
            coord: (dims, values) for coord, dims, values in walk_coords(split_score)
            if not coord.startswith('subject_') and coord != 'neuroid_id'}, dims=split_score.dims)

        split_score = split_score.expand_dims('heldout_subject').expand_dims('story')
        split_score['heldout_subject'], split_score['story'] = [heldout_subject], [story]
        split_scores.append(split_score)
    correlation = Score.merge(*split_scores)

    correlation = apply_aggregate(lambda scores: scores.mean('neuroid').mean('story'), correlation)
    center = correlation.mean('heldout_subject')
    error = correlation.std('heldout_subject')
    score = Score([center, error], coords={**{'aggregation': ['center', 'error']},
                                           **{coord: (dims, values) for coord, dims, values in walk_coords(center)}},
                  dims=('aggregation',) + center.dims)
    score.attrs[Score.RAW_VALUES_KEY] = correlation.attrs[Score.RAW_VALUES_KEY]
    return score
 def test_sel(self):
     score = Score([1, 2], coords={'a': [1, 2]}, dims=['a'])
     score.attrs['raw'] = DataAssembly([0, 2, 1, 3],
                                       coords={'a': [1, 1, 2, 2]},
                                       dims=['a'])
     sel_score = score.sel(a=1)
     np.testing.assert_array_equal(sel_score.raw['a'], [1, 1])
 def test_mean_no_apply_raw(self):
     score = Score([1, 2], coords={'a': [1, 2]}, dims=['a'])
     score.attrs['raw'] = DataAssembly([0, 2, 1, 3],
                                       coords={'a': [1, 1, 2, 2]},
                                       dims=['a'])
     mean_score = score.mean('a', _apply_raw=True)
     assert mean_score.raw == 1.5
 def test_mean(self):
     score = Score([1, 2], coords={'a': [1, 2]}, dims=['a'])
     score.attrs['raw'] = DataAssembly([0, 2, 1, 3],
                                       coords={'a': [1, 1, 2, 2]},
                                       dims=['a'])
     mean_score = score.mean('a')
     np.testing.assert_array_equal(mean_score.raw['a'], [1, 1, 2, 2])
 def __call__(self, assembly, *args, **kwargs):
     result = Score([assembly.values[0]], dims=['dim'])
     raw = result.copy()
     raw['dim_id'] = 'dim', [assembly.values[1]]
     raw['division_coord'] = 'dim', [assembly.values[2]]
     result.attrs['raw'] = raw
     return result
Beispiel #6
0
 def __call__(self, candidate):
     self._logger.info('Computing activations')
     model_activations = read_words(
         candidate,
         self._target_assembly.attrs['stimulus_set'],
         reset_column='story_id',
         copy_columns=('stimulus_id', 'word_id', 'sentence_id'))
     assert set(model_activations['stimulus_id'].values) == set(
         self._target_assembly['stimulus_id'].values)
     self._logger.info('Scoring model')
     cross_subject_scores = self._cross_subject(
         self._target_assembly,
         apply=lambda cross_assembly: self._apply_within_subject(
             model_activations, cross_assembly))
     # normalize by ceiling
     # Note that we normalize by an overall ceiling, so the scores per subject are not normalized wrt. that subject
     # and should thus not be used by themselves. Only the aggregate makes sense to report
     normalized_subject_scores = consistency(
         cross_subject_scores.sel(aggregation='center'),
         self.ceiling.sel(aggregation='center'))
     score = normalized_subject_scores.median('subject_id')
     std = normalized_subject_scores.std('subject_id')
     std['aggregation'] = 'error'
     # the MultiIndex tends to mess things up, so we get rid of it here
     score, std = xr.DataArray(score).expand_dims(
         'aggregation'), xr.DataArray(std).expand_dims('aggregation')
     score = Score(Score.merge(score, std))
     score.attrs['raw'] = cross_subject_scores
     score.attrs['ceiling'] = self.ceiling
     return score
Beispiel #7
0
    def __call__(self, model: TaskModel):
        model.mode = TaskModel.Modes.tokens_to_features
        set_seed(self.seed)
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        logger.debug(
            f"Using block size {self.block_size} for {model.identifier}")

        # Data
        vocab_size = min(model.vocab_size, 250000)
        train_tokens = TextDataset(model_identifier=model.identifier,
                                   model=model,
                                   block_size=self.block_size,
                                   vocab_size=vocab_size,
                                   file_path=self.train_data_file)
        val_tokens = TextDataset(model_identifier=model.identifier,
                                 model=model,
                                 block_size=self.block_size,
                                 vocab_size=vocab_size,
                                 file_path=self.val_data_file)
        test_tokens = TextDataset(model_identifier=model.identifier,
                                  model=model,
                                  block_size=self.block_size,
                                  vocab_size=vocab_size,
                                  file_path=self.eval_data_file)

        # Decoder
        logger.info(f"Vocab size: {vocab_size}")
        features_sample, _ = train_tokens[0]
        lm_head = LMHeadModel(features_size=features_sample.shape[-1],
                              vocab_size=vocab_size,
                              embedding_weights=model.get_embedding_weights()
                              if self.tied else None)
        lm_head = lm_head.to(device)

        # Train
        train(model=lm_head,
              train_dataset=train_tokens,
              val_dataset=val_tokens,
              device=device,
              seed=self.seed,
              **self.kwargs)

        # Evaluation
        test_result = evaluate(model=lm_head,
                               eval_dataset=test_tokens,
                               device=device)
        score = Score([test_result[key] for key in ['perplexity', 'loss']],
                      coords={'measure': ['test_perplexity', 'test_loss']},
                      dims=['measure'])
        score.attrs['datasets'] = {
            'train': self.train_data_file,
            'val': self.val_data_file,
            'test': self.eval_data_file
        }
        score.attrs['benchmark_identifier'] = self.identifier
        score.attrs['model_identifier'] = model.identifier
        return score
Beispiel #8
0
    def extrapolate_neuroid(self, ceilings):
        # figure out how many extrapolation x points we have. E.g. for Pereira, not all combinations are possible
        subject_subsamples = list(sorted(set(ceilings['num_subjects'].values)))
        rng = RandomState(0)
        bootstrap_params = []
        for bootstrap in range(self.num_bootstraps):
            bootstrapped_scores = []
            for num_subjects in subject_subsamples:
                num_scores = ceilings.sel(num_subjects=num_subjects)
                # the sub_subjects dimension creates nans, get rid of those
                num_scores = num_scores.dropna(f'sub_{self.subject_column}')
                assert set(num_scores.dims) == {f'sub_{self.subject_column}', 'split'} or \
                       set(num_scores.dims) == {f'sub_{self.subject_column}'}
                # choose from subject subsets and the splits therein, with replacement for variance
                choices = num_scores.values.flatten()
                bootstrapped_score = rng.choice(choices,
                                                size=len(choices),
                                                replace=True)
                bootstrapped_scores.append(np.mean(bootstrapped_score))

            try:
                params = self.fit(subject_subsamples, bootstrapped_scores)
            except RuntimeError:  # optimal parameters not found
                params = [np.nan, np.nan]
            params = DataAssembly([params],
                                  coords={
                                      'bootstrap': [bootstrap],
                                      'param': ['v0', 'tau0']
                                  },
                                  dims=['bootstrap', 'param'])
            bootstrap_params.append(params)
        bootstrap_params = merge_data_arrays(bootstrap_params)
        # find endpoint and error
        asymptote_threshold = .0005
        interpolation_xs = np.arange(1000)
        ys = np.array([
            v(interpolation_xs, *params) for params in bootstrap_params.values
            if not np.isnan(params).any()
        ])
        median_ys = np.median(ys, axis=0)
        diffs = np.diff(median_ys)
        end_x = np.where(diffs < asymptote_threshold)[0].min(
        )  # first x where increase smaller than threshold
        # put together
        center = np.median(np.array(bootstrap_params)[:, 0])
        error = ci_error(ys[:, end_x], center=center)
        score = Score(
            [center] + list(error),
            coords={'aggregation': ['center', 'error_low', 'error_high']},
            dims=['aggregation'])
        score.attrs['raw'] = ceilings
        score.attrs['bootstrapped_params'] = bootstrap_params
        score.attrs['endpoint_x'] = DataAssembly(end_x)
        return score
Beispiel #9
0
def aggregate_neuroid_scores(neuroid_scores, subject_column):
    subject_scores = neuroid_scores.groupby(subject_column).median()
    center = subject_scores.median(subject_column)
    subject_values = np.nan_to_num(subject_scores.values, nan=0)  # mad cannot deal with all-nan in one axis, treat as 0
    subject_axis = subject_scores.dims.index(subject_scores[subject_column].dims[0])
    error = median_absolute_deviation(subject_values, axis=subject_axis)
    score = Score([center, error], coords={'aggregation': ['center', 'error']}, dims=['aggregation'])
    score.attrs['raw'] = neuroid_scores
    score.attrs['description'] = "score aggregated by taking median of neuroids per subject, " \
                                 "then median of subject scores"
    return score
 def test_squeeze(self):
     score = Score([[1, 2]],
                   coords={
                       's': [0],
                       'a': [1, 2]
                   },
                   dims=['s', 'a'])
     score.attrs['raw'] = DataAssembly([[0, 2, 1, 3]],
                                       coords={
                                           's': [0],
                                           'a': [1, 1, 2, 2]
                                       },
                                       dims=['s', 'a'])
     sel_score = score.squeeze('s')
     np.testing.assert_array_equal(sel_score.raw.dims, ['a'])
Beispiel #11
0
    def _call(
            self,
            model_identifier,
            benchmark_identifier,  # storage fields
            model,
            benchmark,
            layers,
            prerun=False):
        if prerun:
            # pre-run activations together to avoid running every layer separately
            model(layers=layers, stimuli=benchmark._assembly.stimulus_set)

        layer_scores = []
        for layer in tqdm(layers, desc="layers"):
            layer_model = LayerMappedModel(
                identifier=f"{model_identifier}-{layer}",
                # per-layer identifier to avoid overlap
                activations_model=model,
                region_layer_map={benchmark.region: layer})
            layer_model = TemporalIgnore(layer_model)
            score = benchmark(layer_model)
            score = score.expand_dims('layer')
            score['layer'] = [layer]
            layer_scores.append(score)
        layer_scores = Score.merge(*layer_scores)
        layer_scores = layer_scores.sel(
            layer=layers)  # preserve layer ordering
        return layer_scores
Beispiel #12
0
 def _call(
         self,
         model_identifier,
         benchmark_identifier,  # storage fields
         model,
         benchmark_builder,
         layer_and_params):
     all_scores = []
     all_layer_param_str = []
     for layer, param_str in tqdm(layer_and_params, desc="layers"):
         bench_args, bench_kwargs = json.loads(param_str)
         benchmark = benchmark_builder(*bench_args, **bench_kwargs)
         layer_model = self.build_layer_model(
             identifier=f"{model_identifier}-{layer}-layer-param",
             model=model,
             benchmark=benchmark,
             layer=layer,
         )
         score = benchmark(layer_model)
         score = score.expand_dims('layer_param')
         layer_param_str = '%s-%s' % (layer, param_str)
         score['layer_param'] = [layer_param_str]
         all_scores.append(score)
         all_layer_param_str.append(layer_param_str)
     all_scores = Score.merge(*all_scores)
     all_scores = all_scores.sel(layer_param=all_layer_param_str)
     return all_scores
Beispiel #13
0
def score(benchmark, model, layers=None, model_impl=None, subsample=None):
    model_impl = model_impl or model_pool[model]
    if subsample:
        SubsamplingHook.hook(model, subsample)
    layers = layers or model_layers[model]

    _logger.info('Loading benchmark')
    benchmark_impl = benchmark_pool[benchmark]

    _logger.info('Running')
    # shortcut for performance benchmarks
    if any(benchmark.startswith(performance_prefix) for performance_prefix in ['wikitext', 'glue']):
        return benchmark_impl(model_impl)

    # only last layer for behavioral benchmarks
    if benchmark.startswith('Futrell2018'):
        layers = layers[-1:]

    layer_scores = []
    for i, layer in enumerate(tqdm(layers, desc='layers')):
        if any(benchmark.startswith(performance_prefix) for performance_prefix in ['wikitext', 'glue']):
            candidate = StripLayersAfter(model_impl, layer=layer)
        else:  # prerun everything for 1st layer
            candidate = FixedLayer(model_impl, layer, prerun=layers if i == 0 else None)
        layer_score = benchmark_impl(candidate)
        layer_score = layer_score.expand_dims('layer')
        layer_score['layer'] = [layer]
        layer_scores.append(layer_score)
    layer_scores = Score.merge(*layer_scores)
    layer_scores = layer_scores.sel(layer=layers)  # preserve layer ordering
    layer_scores.attrs['model'] = model
    layer_scores.attrs['benchmark'] = benchmark
    return layer_scores
Beispiel #14
0
 def _repeat(self, func):
     random_state = self._initialize_random_state()
     repetitions = list(range(self._repetitions))
     scores = [func(random_state=random_state) for repetition in repetitions]
     score = Score(scores, coords={'split': repetitions}, dims=['split'])
     self._save_matrix()
     return apply_aggregate(self.aggregate, score)
Beispiel #15
0
 def _call(
         self,
         model_identifier,
         benchmark_identifier,
         visual_degrees,  # storage fields
         model,
         benchmark,
         layers,
         prerun=False):
     layer_scores = []
     for i, layer in enumerate(tqdm(layers, desc="layers")):
         layer_model = LayerMappedModel(
             identifier=f"{model_identifier}-{layer}",
             visual_degrees=visual_degrees,
             # per-layer identifier to avoid overlap
             activations_model=model,
             region_layer_map={benchmark.region: layer})
         layer_model = TemporalIgnore(layer_model)
         if i == 0 and prerun:  # pre-run activations together to avoid running every layer separately
             # we can only pre-run stimuli in response to the benchmark, since we might otherwise be missing
             # visual_degrees resizing.
             layer_model = PreRunLayers(model=model,
                                        layers=layers,
                                        forward=layer_model)
         score = benchmark(layer_model)
         score = score.expand_dims('layer')
         score['layer'] = [layer]
         layer_scores.append(score)
     layer_scores = Score.merge(*layer_scores)
     layer_scores = layer_scores.sel(
         layer=layers)  # preserve layer ordering
     return layer_scores
Beispiel #16
0
 def _apply_cross(self, source_assembly, cross_assembly):
     # some subjects have only done one experiment which leads to nans
     cross_assembly = cross_assembly.dropna('neuroid')
     if len(cross_assembly['neuroid']) == 0:
         return Score([np.nan, np.nan], coords={'aggregation': ['center', 'error']}, dims=['aggregation'])
     return super(_PereiraSubjectWise, self)._apply_cross(
         source_assembly=source_assembly, cross_assembly=cross_assembly)
Beispiel #17
0
 def __call__(self, candidate: BrainModel):
     candidate.start_recording('IT', time_bins=self._time_bins)
     stimulus_set = place_on_screen(
         self._assembly.stimulus_set,
         target_visual_degrees=candidate.visual_degrees(),
         source_visual_degrees=self._visual_degrees)
     # Temporal recordings from large candidates take up a lot of memory and compute time.
     # In order to quickly reject recordings that are static over time,
     # we will show one image and check whether the recordings vary over time at all or not.
     # If they don't we can quickly score the candidate with a failure state
     # since it will not be able to predict temporal differences with the OST metric
     check_stimulus_set = stimulus_set[:1]
     check_stimulus_set.identifier = None  # unset identifier to avoid storing (interferes with actual stimulus_set)
     check_recordings = candidate.look_at(
         check_stimulus_set, number_of_trials=self._number_of_trials)
     if not temporally_varying(check_recordings):
         score = Score([np.nan, np.nan],
                       coords={'aggregation': ['center', 'error']},
                       dims=['aggregation'])
     else:
         recordings = candidate.look_at(
             stimulus_set, number_of_trials=self._number_of_trials)
         score = self._similarity_metric(recordings, self._assembly)
     score = ceil_score(score, self.ceiling)
     return score
Beispiel #18
0
        def collect(self, identifier, assembly, metric):
            """ Instead of iterating over subject combinations and then afterwards over holdout subjects,
            we here iterate over holdout subjects and then over electrode sub-combinations of the remaining pool. """
            subjects = set(assembly[self.subject_column].values)
            scores = []
            for holdout_subject in tqdm(subjects, desc='subjects'):
                subject_pool = subjects - {holdout_subject}
                subject_pool_assembly = assembly[{'neuroid': [subject in subject_pool
                                                              for subject in assembly[self.subject_column].values]}]
                holdout_subject_assembly = assembly[{'neuroid': [subject == holdout_subject
                                                                 for subject in assembly[self.subject_column].values]}]

                electrodes = subject_pool_assembly['neuroid_id'].values
                electrodes_range = np.arange(5, len(electrodes), 5)
                for num_electrodes in tqdm(electrodes_range, desc='num electrodes'):
                    electrodes_combinations = self._choose_electrodes(electrodes, num_electrodes,
                                                                      num_choices=self._num_samples)
                    for electrodes_split, electrodes_selection in enumerate(electrodes_combinations):
                        electrodes_assembly = subject_pool_assembly[{'neuroid': [
                            neuroid_id in electrodes_selection
                            for neuroid_id in subject_pool_assembly['neuroid_id'].values]}]
                        score = metric(electrodes_assembly, holdout_subject_assembly)
                        # store scores
                        score = score.expand_dims(f"sub_{self.subject_column}")
                        score.__setitem__(f"sub_{self.subject_column}", [holdout_subject])
                        score = score.expand_dims('num_electrodes').expand_dims('electrodes_split')
                        score['num_electrodes'] = [num_electrodes]
                        score['electrodes_split'] = [electrodes_split]
                        scores.append(score)

            scores = Score.merge(*scores)
            ceilings = scores.raw
            ceilings = ceilings.rename({'split': 'subsplit'}).stack(split=['electrodes_split', 'subsplit'])
            ceilings.attrs['raw'] = scores
            return ceilings
    def pipe(self, assembly):
        """
        :param brainscore.assemblies.NeuroidAssembly assembly:
        :return: brainscore.assemblies.DataAssembly
        """
        dividers = self.dividers(assembly, dividing_coords=self._dividers)
        scores = []
        progress = tqdm(enumerate_done(dividers),
                        total=len(dividers),
                        desc='cartesian product')
        for i, divider, done in progress:
            progress.set_description(str(divider))
            divided_assembly = assembly.multisel(**divider)
            # squeeze dimensions if necessary
            for divider_coord in divider:
                dims = assembly[divider_coord].dims
                assert len(dims) == 1
                if dims[0] in divided_assembly.dims and len(
                        divided_assembly[dims[0]]) == 1:
                    divided_assembly = divided_assembly.squeeze(dims[0])
            result = yield from self._get_result(divided_assembly, done=done)

            for coord_name, coord_value in divider.items():
                result = result.expand_dims(coord_name)
                result[coord_name] = [coord_value]
            scores.append(result)
        scores = Score.merge(*scores)
        yield scores
Beispiel #20
0
    def __init__(self):
        ceiling = Score([1, np.nan],
                        coords={'aggregation': ['center', 'error']},
                        dims=['aggregation'])
        assembly_repetition = get_assembly()
        assert len(np.unique(assembly_repetition['region'])) == 1
        assert hasattr(assembly_repetition, 'repetition')
        self.region = 'IT'
        self.assembly = average_repetition(assembly_repetition)
        self._assembly = self.assembly
        self.timebins = timebins_from_assembly(self.assembly)

        self._similarity_metric = CrossRegressedCorrelation(
            regression=pls_regression(),
            correlation=pearsonr_correlation(),
            crossvalidation_kwargs=dict(
                stratification_coord=Split.Defaults.stratification_coord
                if hasattr(self.assembly, Split.Defaults.stratification_coord
                           ) else None))
        identifier = f'{assembly_repetition.name}-layer_selection'
        ceiler = InternalConsistency()
        super(_MockBenchmark,
              self).__init__(identifier=identifier,
                             ceiling_func=lambda: ceiler(assembly_repetition),
                             version='1.0')
Beispiel #21
0
 def _call(
         self,
         model_identifier,
         layer,
         benchmark_identifier,  # storage fields
         model,
         benchmark_builder,
         params):
     param_scores = []
     for param_str in tqdm(params, desc="params"):
         bench_args, bench_kwargs = json.loads(param_str)
         benchmark = benchmark_builder(*bench_args, **bench_kwargs)
         layer_model = self.build_layer_model(
             identifier=f"{model_identifier}-{layer}",
             model=model,
             benchmark=benchmark,
             layer=layer,
         )
         score = benchmark(layer_model)
         score = score.expand_dims('param')
         score['param'] = [param_str]
         param_scores.append(score)
     param_scores = Score.merge(*param_scores)
     param_scores = param_scores.sel(
         param=params)  # preserve layer ordering
     return param_scores
 def __call__(self, prediction, target):
     # align
     prediction = prediction.sortby(
         [self._correlation_coord, self._neuroid_coord])
     target = target.sortby([self._correlation_coord, self._neuroid_coord])
     assert np.array(prediction[self._correlation_coord].values == target[
         self._correlation_coord].values).all()
     assert np.array(prediction[self._neuroid_coord].values == target[
         self._neuroid_coord].values).all()
     # compute correlation per neuroid
     neuroid_dims = target[self._neuroid_coord].dims
     assert len(neuroid_dims) == 1
     correlations = []
     for i, coord_value in enumerate(target[self._neuroid_coord].values):
         target_neuroids = target.isel(**{
             neuroid_dims[0]: i
         })  # `isel` is about 10x faster than `sel`
         prediction_neuroids = prediction.isel(**{neuroid_dims[0]: i})
         r, p = self._correlation(target_neuroids, prediction_neuroids)
         correlations.append(r)
     # package
     result = Score(correlations,
                    coords={
                        coord: (dims, values)
                        for coord, dims, values in walk_coords(target)
                        if dims == neuroid_dims
                    },
                    dims=neuroid_dims)
     return result
Beispiel #23
0
 def __init__(self, noise_type, parent_category):
     self._noise_type = noise_type
     ceiling = Score([1, np.nan], coords={'aggregation': ['center', 'error']}, dims=['aggregation'])
     super(Imagenet_C_Group, self).__init__(identifier=f'dietterich.Hendrycks2019-{noise_type}-top1', version=1,
                                            ceiling_func=lambda: ceiling,
                                            parent=f'dietterich.Hendrycks2019-{parent_category}-top1',
                                            bibtex=BIBTEX)
Beispiel #24
0
    def __init__(self):
        ceiling = Score(
            [.79, np.nan],  # following private conversation with Kohitij Kar
            coords={'aggregation': ['center', 'error']},
            dims=['aggregation'])
        super(DicarloKar2019OST, self).__init__(
            identifier='dicarlo.Kar2019-ost',
            version=2,
            ceiling_func=lambda: ceiling,
            parent='IT-temporal',
            paper_link='https://www.nature.com/articles/s41593-019-0392-5')
        assembly = brainscore.get_assembly('dicarlo.Kar2019')
        # drop duplicate images
        _, index = np.unique(assembly['image_id'], return_index=True)
        assembly = assembly.isel(presentation=index)
        assembly.attrs['stimulus_set'] = assembly.stimulus_set.drop_duplicates(
            'image_id')

        assembly = assembly.sel(decoder='svm')

        self._assembly = assembly
        self._assembly['truth'] = self._assembly['image_label']
        self._assembly.stimulus_set['truth'] = self._assembly.stimulus_set[
            'image_label']

        self._similarity_metric = OSTCorrelation()
        self._visual_degrees = VISUAL_DEGREES
        self._number_of_trials = 44
Beispiel #25
0
 def collect(self, identifier, assembly, metric):
     subjects = set(assembly[self.subject_column].values)
     subject_subsamples = self.build_subject_subsamples(subjects)
     scores = []
     for num_subjects in tqdm(subject_subsamples, desc='num subjects'):
         selection_combinations = self.iterate_subsets(
             assembly, num_subjects=num_subjects)
         for selections, sub_assembly in tqdm(selection_combinations,
                                              desc='selections'):
             try:
                 score = self.holdout_ceiling(assembly=sub_assembly,
                                              metric=metric)
                 score = score.expand_dims('num_subjects')
                 score['num_subjects'] = [num_subjects]
                 for key, selection in selections.items():
                     expand_dim = f'sub_{key}'
                     score = score.expand_dims(expand_dim)
                     score[expand_dim] = [str(selection)]
                 scores.append(score.raw)
             except KeyError as e:  # nothing to merge
                 if str(e) == "'z'":
                     self._logger.debug(f"Ignoring merge error {e}")
                     continue
                 else:
                     raise e
     scores = Score.merge(*scores)
     scores = self.post_process(scores)
     return scores
Beispiel #26
0
def decode_voxels():
    assembly = load_voxels()
    cross_validation = CrossValidationSingle(splits=1,
                                             split_coord='stimulus_id',
                                             stratification_coord='story')

    subjects = list(sorted(set(assembly['subject_UID'].values)))
    scores = []
    for subject in subjects:
        subject_index = [
            subject == coord_subject
            for coord_subject in assembly['subject_UID'].values
        ]
        subject_assembly = assembly[{'neuroid': subject_index}]
        subject_score = cross_validation(subject_assembly, apply=fit_decode)
        subject_score = subject_score.sel(
            aggregation='center')  # since we're only doing one split
        print(
            f"subject {subject}: "
            f"{subject_score.sel(accuracy_aggregation='center', train_test='train').values:.2f} train, "
            f"{subject_score.sel(accuracy_aggregation='center', train_test='test').values:.2f} test"
        )
        subject_score = subject_score.expand_dims('subject')
        subject_score['subject'] = [subject]
        scores.append(subject_score)
    scores = Score.merge(*scores)
    return scores
Beispiel #27
0
 def __init__(self):
     stimulus_set = pd.read_csv(
         os.path.join(os.path.dirname(__file__), 'imagenet2012.csv'))
     stimulus_set = StimulusSet(stimulus_set)
     stimulus_set.image_paths = {
         row.image_id: row.filepath
         for row in stimulus_set.itertuples()
     }
     self._stimulus_set = stimulus_set
     self._similarity_metric = Accuracy()
     ceiling = Score([1, np.nan],
                     coords={'aggregation': ['center', 'error']},
                     dims=['aggregation'])
     super(Imagenet2012, self).__init__(identifier='fei-fei.Deng2009-top1',
                                        version=1,
                                        ceiling_func=lambda: ceiling,
                                        parent='ImageNet',
                                        bibtex="""@INPROCEEDINGS{5206848,  
                                             author={J. {Deng} and W. {Dong} and R. {Socher} and L. {Li} and  {Kai Li} and  {Li Fei-Fei}},  
                                             booktitle={2009 IEEE Conference on Computer Vision and Pattern Recognition},   
                                             title={ImageNet: A large-scale hierarchical image database},   
                                             year={2009},  
                                             volume={},  
                                             number={},  
                                             pages={248-255},
                                             url = {https://ieeexplore.ieee.org/document/5206848}
                                         }""")
Beispiel #28
0
 def run_evaluation(return_score=False):
     scores = []
     # Loop to handle MNLI double evaluation (matched, mis-matched)
     for eval_task in eval_task_names:
         examples, label_list, output_mode = get_examples(
             data_dir=data_dir, task=eval_task, evaluate=True)
         eval_dataset = model.glue_dataset(
             task=eval_task,
             examples=examples,
             label_list=label_list,
             output_mode=output_mode,
             max_seq_length=max_seq_length)
         result = evaluate(features_model=model,
                           decoder_head=decoder_head,
                           eval_dataset=eval_dataset,
                           task_name=eval_task,
                           output_mode=output_mode,
                           device=device)
         if not return_score:
             return result  # we're ignoring mnli-mm here, but this return is just for progress logging anyway
         score = Score([[value for key, value in result.items()]],
                       coords={
                           'eval_task': [eval_task],
                           'measure': list(result.keys())
                       },
                       dims=['eval_task', 'measure'])
         score.attrs['data_dir'] = data_dir
         score.attrs['benchmark_identifier'] = f"glue-{self.task_name}"
         score.attrs['eval_task'] = eval_task
         score.attrs['model_identifier'] = model.identifier
         scores.append(score)
     scores = Score.merge(*scores)
     return scores
Beispiel #29
0
 def __call__(self, assembly):
     ceilings = []
     for time_bin in tqdm(assembly['time_bin'].values, desc='time-ceiling'):
         ceiling = self.ceiling(assembly.sel(time_bin=time_bin))
         ceiling = ceiling.expand_dims('time_bin')
         ceiling['time_bin'] = [str(time_bin)]
         ceilings.append(ceiling)
     ceiling = Score.merge(*ceilings)
     return ceiling
Beispiel #30
0
 def correlate(self, predicted_osts, target_osts):
     non_nan = np.logical_and(~np.isnan(predicted_osts),
                              ~np.isnan(target_osts))
     predicted_osts, target_osts = predicted_osts[non_nan], target_osts[
         non_nan]
     # use Spearman over Pearson since it tests whether the rank orders are the same,
     # which allows for nonlinear correlates whereas Pearson assumes linearity.
     correlation, p = spearmanr(predicted_osts, target_osts)
     return Score(correlation)