def test_larger_error(self): source_assembly = np.random.rand(50, 2) source_assembly = NeuroidAssembly( source_assembly, coords={ 'image_id': list(range(source_assembly.shape[0])), 'neuroid_id': list(range(source_assembly.shape[1])) }, dims=['image_id', 'neuroid_id']) source_assembly = source_assembly.stack(presentation=('image_id', ), neuroid=('neuroid_id', )) target_assembly = np.random.rand(100, 3) target_assembly = NeuroidAssembly( target_assembly, coords={ 'image_id': list(range(target_assembly.shape[0])), 'neuroid_id': list(range(target_assembly.shape[1])) }, dims=['image_id', 'neuroid_id']) target_assembly = target_assembly.stack(presentation=('image_id', ), neuroid=('neuroid_id', )) with pytest.raises(Exception): subset(source_assembly, target_assembly, subset_dims=('presentation', ))
def test_smaller_last(self): source_assembly = np.random.rand(100, 3) source_assembly = NeuroidAssembly( source_assembly, coords={ 'image_id': list(range(source_assembly.shape[0])), 'neuroid_id': list(range(source_assembly.shape[1])) }, dims=['image_id', 'neuroid_id']) source_assembly = source_assembly.stack(presentation=('image_id', ), neuroid=('neuroid_id', )) target_assembly = source_assembly.sel( presentation=list(map(lambda x: (50 + x, ), range(50))), neuroid=list(map(lambda x: (1 + x, ), range(2)))) subset_assembly = subset(source_assembly, target_assembly, subset_dims=('presentation', )) np.testing.assert_array_equal(subset_assembly.coords.keys(), target_assembly.coords.keys()) for coord_name in target_assembly.coords: assert all( subset_assembly[coord_name] == target_assembly[coord_name]) assert (subset_assembly == target_assembly).all()
def test_equal_shifted(self): target_assembly = np.random.rand(100, 3) target_assembly = NeuroidAssembly( target_assembly, coords={ 'image_id': list(range(target_assembly.shape[0])), 'neuroid_id': list(range(target_assembly.shape[1])) }, dims=['image_id', 'neuroid_id']) target_assembly = target_assembly.stack(presentation=('image_id', ), neuroid=('neuroid_id', )) shifted_values = np.concatenate( (target_assembly.values[1:], target_assembly.values[:1])) shifed_ids = np.array(list(range(shifted_values.shape[0]))) + 1 shifed_ids[-1] = 0 source_assembly = NeuroidAssembly( shifted_values, coords={ 'image_id': shifed_ids, 'neuroid_id': list(range(shifted_values.shape[1])) }, dims=['image_id', 'neuroid_id']) source_assembly = source_assembly.stack(presentation=('image_id', ), neuroid=('neuroid_id', )) subset_assembly = subset(source_assembly, target_assembly, subset_dims=('presentation', )) np.testing.assert_array_equal(subset_assembly.coords.keys(), target_assembly.coords.keys()) assert subset_assembly.shape == target_assembly.shape
def test_repeated_dim_and_adjacent(self): source_assembly = np.random.rand(5, 5) source_assembly = NeuroidAssembly( source_assembly, coords={ 'image_id': ('presentation', list(range(source_assembly.shape[0]))), 'image_meta': ('presentation', np.zeros(source_assembly.shape[0])), 'adjacent': 12 }, dims=['presentation', 'presentation']) target_assembly = NeuroidAssembly( np.zeros(3), coords={ 'image_id': [0, 2, 3] }, dims=['image_id']).stack(presentation=('image_id', )) subset_assembly = subset(source_assembly, target_assembly, subset_dims=('presentation', ), repeat=True) np.testing.assert_array_equal(subset_assembly.shape, (3, 3)) assert set(subset_assembly['image_id'].values) == set( target_assembly['image_id'].values) assert subset_assembly['adjacent'] == 12
def test_repeated_target(self): source_assembly = np.random.rand(5, 3) source_assembly = NeuroidAssembly( source_assembly, coords={ 'image_id': list(range(source_assembly.shape[0])), 'neuroid_id': list(range(source_assembly.shape[1])) }, dims=['image_id', 'neuroid_id']) source_assembly = source_assembly.stack(presentation=('image_id', ), neuroid=('neuroid_id', )) target_assembly = NeuroidAssembly( np.repeat(source_assembly, 2, axis=0), coords={ 'image_id': np.repeat(list(range(source_assembly.shape[0])), 2, axis=0), 'neuroid_id': list(range(source_assembly.shape[1])) }, dims=['image_id', 'neuroid_id']) target_assembly = target_assembly.stack(presentation=('image_id', ), neuroid=('neuroid_id', )) subset_assembly = subset(source_assembly, target_assembly, subset_dims=('presentation', ), repeat=True) np.testing.assert_array_equal(subset_assembly.coords.keys(), target_assembly.coords.keys()) for coord_name in target_assembly.coords: assert all( subset_assembly[coord_name] == target_assembly[coord_name]) np.testing.assert_array_equal(subset_assembly, target_assembly) assert (subset_assembly == target_assembly).all()
def align(self, source_assembly, target_assembly): dimensions = list(self._order_dimensions) + list( set(source_assembly.dims) - set(self._order_dimensions)) source_assembly = source_assembly.transpose(*dimensions) return subset(source_assembly, target_assembly, subset_dims=[self._alignment_dim], repeat=self._repeat)
def pipe(self, source_assembly, target_assembly): # check only for equal values, alignment is given by metadata assert sorted(source_assembly[self._split_coord].values) == sorted( target_assembly[self._split_coord].values) if self._split.do_stratify: assert hasattr(source_assembly, self._stratification_coord) assert sorted(source_assembly[self._stratification_coord].values) == \ sorted(target_assembly[self._stratification_coord].values) cross_validation_values, splits = self._split.build_splits( target_assembly) split_scores = [] for split_iterator, (train_indices, test_indices), done \ in tqdm(enumerate_done(splits), total=len(splits), desc='cross-validation'): train_values, test_values = cross_validation_values[ train_indices], cross_validation_values[test_indices] train_source = subset(source_assembly, train_values, dims_must_match=False) train_target = subset(target_assembly, train_values, dims_must_match=False) assert len(train_source[self._split_coord]) == len( train_target[self._split_coord]) test_source = subset(source_assembly, test_values, dims_must_match=False) test_target = subset(target_assembly, test_values, dims_must_match=False) assert len(test_source[self._split_coord]) == len( test_target[self._split_coord]) split_score = yield from self._get_result(train_source, train_target, test_source, test_target, done=done) split_score = split_score.expand_dims('split') split_score['split'] = [split_iterator] split_scores.append(split_score) split_scores = Score.merge(*split_scores) yield split_scores
def pipe(self, assembly): """ :param assembly: the assembly to cross-validate over """ cross_validation_values, splits = self._split.build_splits(assembly) split_scores = [] for split_iterator, (train_indices, test_indices), done \ in tqdm(enumerate_done(splits), total=len(splits), desc='cross-validation'): train_values, test_values = cross_validation_values[ train_indices], cross_validation_values[test_indices] train = subset(assembly, train_values, dims_must_match=False) test = subset(assembly, test_values, dims_must_match=False) split_score = yield from self._get_result(train, test, done=done) split_score = split_score.expand_dims('split') split_score['split'] = [split_iterator] split_scores.append(split_score) split_scores = Score.merge(*split_scores) yield split_scores
def test_category_subselection(self): assembly = get_assembly('dicarlo.MajajHong2015') categories = np.unique(assembly['category_name']) target = xr.DataArray([0] * len(categories), coords={ 'category_name': categories }, dims=['category_name' ]).stack(presentation=['category_name']) sub_assembly = subset(assembly, target, repeat=True, dims_must_match=False) assert (assembly == sub_assembly).all()
def test_equal(self): assembly = np.random.rand(100, 3) assembly = NeuroidAssembly(assembly, coords={ 'image_id': list(range(assembly.shape[0])), 'neuroid_id': list(range(assembly.shape[1])) }, dims=['image_id', 'neuroid_id']) assembly = assembly.stack(presentation=('image_id', ), neuroid=('neuroid_id', )) subset_assembly = subset(assembly, assembly, subset_dims=('presentation', )) assert (subset_assembly == assembly).all()