def boot_noise_ceiling(rdms, method='cosine', rdm_descriptor='index'): """ calculates a noise ceiling by leave one out & full set Args: rdms(rsatoolbox.rdm.RDMs): data to calculate noise ceiling method(string): comparison method to use rdm_descriptor(string): descriptor to group rdms Returns: list: [lower nc-bound, upper nc-bound] """ _, test_set, ceil_set = sets_leave_one_out_rdm(rdms, rdm_descriptor) pred_test = pool_rdm(rdms, method=method) noise_min = [] noise_max = [] for i in range(len(ceil_set)): train = ceil_set[i] test = test_set[i] pred_train = pool_rdm(train[0], method=method) noise_min.append(np.mean(compare(pred_train, test[0], method))) noise_max.append(np.mean(compare(pred_test, test[0], method))) noise_min = np.mean(np.array(noise_min)) noise_max = np.mean(np.array(noise_max)) return noise_min, noise_max
def _loss(theta, model, data, method='cosine', sigma_k=None, pattern_descriptor=None, pattern_idx=None, ridge_weight=0): """Method for calculating a loss for a model and parameter combination Args: theta(numpy.ndarray): evaluated parameter value model(Model): the model to be fit data(rsatoolbox.rdm.RDMs): data to be fit method(String, optional): evaluation metric The default is 'cosine'. pattern_idx(numpy.ndarray, optional) sampled patterns The default is None. pattern_descriptor (String, optional) descriptor used for fitting. The default is None. sigma_k(matrix): pattern-covariance matrix used only for whitened distances (ending in _cov) to compute the covariance matrix for rdms ridge_weight(float): weight for a ridge regularisation Returns: numpy.ndarray: loss """ pred = model.predict_rdm(theta) if not (pattern_idx is None or pattern_descriptor is None): pred = pred.subsample_pattern(pattern_descriptor, pattern_idx) return -np.mean(compare(pred, data, method=method, sigma_k=sigma_k)) \ + np.sum(theta * theta) * ridge_weight
def fit_select(model, data, method='cosine', pattern_idx=None, pattern_descriptor=None, sigma_k=None): """ fits selection models by evaluating each rdm and selcting the one with best performance. Works only for ModelSelect Args: model(rsatoolbox.model.Model): model to be fit data(rsatoolbox.rdm.RDMs): Data to fit to method(String): Evaluation method pattern_idx(numpy.ndarray): Which patterns are sampled pattern_descriptor(String): Which descriptor is used sigma_k(matrix): pattern-covariance matrix used only for whitened distances (ending in _cov) to compute the covariance matrix for rdms Returns: theta(int): parameter vector """ evaluations = np.zeros(model.n_rdm) for i_rdm in range(model.n_rdm): pred = model.predict_rdm(i_rdm) if not (pattern_idx is None or pattern_descriptor is None): pred = pred.subsample_pattern(pattern_descriptor, pattern_idx) evaluations[i_rdm] = np.mean( compare(pred, data, method=method, sigma_k=sigma_k)) theta = np.argmax(evaluations) return theta
def eval_fixed(models, data, theta=None, method='cosine'): """evaluates models on data, without any bootstrapping or cross-validation Args: models(list of rsatoolbox.model.Model or list): models to be evaluated data(rsatoolbox.rdm.RDMs): data to evaluate on theta(numpy.ndarray): parameter vector for the models method(string): comparison method to use Returns: float: evaluation """ models, evaluations, theta, _ = input_check_model(models, theta, None, 1) evaluations = np.repeat(np.expand_dims(evaluations, -1), data.n_rdm, -1) for k, model in enumerate(models): rdm_pred = model.predict_rdm(theta=theta[k]) evaluations[k] = compare(rdm_pred, data, method) evaluations = evaluations.reshape((1, len(models), data.n_rdm)) noise_ceil = boot_noise_ceiling(data, method=method, rdm_descriptor='index') variances = np.cov(evaluations[0], ddof=1) \ / evaluations.shape[-1] dof = evaluations.shape[-1] - 1 result = Result(models, evaluations, method=method, cv_method='fixed', noise_ceiling=noise_ceil, variances=variances, dof=dof) return result
def eval_bootstrap_rdm(models, data, theta=None, method='cosine', N=1000, rdm_descriptor='index', boot_noise_ceil=True): """evaluates models on data performs bootstrapping to get a sampling distribution Args: models(rsatoolbox.model.Model or list of these): models to be evaluated data(rsatoolbox.rdm.RDMs): data to evaluate on theta(numpy.ndarray): parameter vector for the models method(string): comparison method to use N(int): number of samples rdm_descriptor(string): rdm_descriptor to group rdms for bootstrap Returns: numpy.ndarray: vector of evaluations """ models, evaluations, theta, _ = input_check_model(models, theta, None, N) noise_min = [] noise_max = [] for i in tqdm.trange(N): sample, rdm_idx = bootstrap_sample_rdm(data, rdm_descriptor) for j, mod in enumerate(models): rdm_pred = mod.predict_rdm(theta=theta[j]) evaluations[i, j] = np.mean(compare(rdm_pred, sample, method)) if boot_noise_ceil: noise_min_sample, noise_max_sample = boot_noise_ceiling( sample, method=method, rdm_descriptor=rdm_descriptor) noise_min.append(noise_min_sample) noise_max.append(noise_max_sample) if boot_noise_ceil: eval_ok = np.isfinite(evaluations[:, 0]) noise_ceil = np.array([noise_min, noise_max]) variances = np.cov( np.concatenate([evaluations[eval_ok, :].T, noise_ceil[:, eval_ok]])) else: eval_ok = np.isfinite(evaluations[:, 0]) noise_ceil = np.array( boot_noise_ceiling(data, method=method, rdm_descriptor=rdm_descriptor)) variances = np.cov(evaluations[eval_ok, :].T) dof = data.n_rdm - 1 variances = np.cov(evaluations.T) result = Result(models, evaluations, method=method, cv_method='bootstrap_rdm', noise_ceiling=noise_ceil, variances=variances, dof=dof) return result
def test_two_rdms_nan(self): from rsatoolbox.model import ModelInterpolate, ModelWeighted from rsatoolbox.model.fitter import fit_regress, fit_optimize_positive from rsatoolbox.rdm import concat, compare rdms = self.rdms.subsample_pattern('index', [0, 1, 1, 3, 4, 5]) model_rdms = concat([rdms[0], rdms[1]]) model_weighted = ModelWeighted('m_weighted', model_rdms) model_interpolate = ModelInterpolate('m_interpolate', model_rdms) for i_method in ['cosine', 'corr', 'cosine_cov', 'corr_cov']: theta_m_i = model_interpolate.fit(rdms, method=i_method) theta_m_w = model_weighted.fit(rdms, method=i_method) theta_m_w_pos = fit_optimize_positive(model_weighted, rdms, method=i_method) theta_m_w_linear = fit_regress(model_weighted, rdms, method=i_method) eval_m_i = np.mean( compare(model_weighted.predict_rdm(theta_m_i), rdms, method=i_method)) eval_m_w = np.mean( compare(model_weighted.predict_rdm(theta_m_w), rdms, method=i_method)) eval_m_w_pos = np.mean( compare(model_weighted.predict_rdm(theta_m_w_pos), rdms, method=i_method)) eval_m_w_linear = np.mean( compare(model_weighted.predict_rdm(theta_m_w_linear), rdms, method=i_method)) self.assertAlmostEqual( eval_m_i, eval_m_w_pos, places=4, msg='weighted fit differs from interpolation fit!' + '\nfor %s' % i_method) self.assertAlmostEqual( eval_m_w, eval_m_w_linear, places=4, msg='regression fit differs from optimization fit!' + '\nfor %s' % i_method)
def cv_noise_ceiling(rdms, ceil_set, test_set, method='cosine', pattern_descriptor='index'): """ calculates the noise ceiling for crossvalidation. The upper bound is calculated by pooling all rdms for the appropriate patterns in the testsets. the lower bound is calculated by using only the appropriate rdms from ceil_set for training. Args: rdms(rsatoolbox.rdm.RDMs): complete data ceil_set(list): a list of the training RDMs with 2-tuple entries: (RDMs, pattern_idx) test_set(list): a list of the test RDMs with 2-tuple entries: (RDMs, pattern_idx) method(string): comparison method to use pattern_descriptor(string): descriptor to group patterns Returns: list: lower nc-bound, upper nc-bound """ assert len(ceil_set) == len(test_set), \ 'train_set and test_set must have the same length' noise_min = [] noise_max = [] for i in range(len(ceil_set)): train = ceil_set[i] test = test_set[i] pred_train = pool_rdm(train[0], method=method) pred_train = pred_train.subsample_pattern(by=pattern_descriptor, value=test[1]) pred_test = pool_rdm(rdms, method=method) pred_test = pred_test.subsample_pattern(by=pattern_descriptor, value=test[1]) noise_min.append(np.mean(compare(pred_train, test[0], method))) noise_max.append(np.mean(compare(pred_test, test[0], method))) noise_min = np.mean(np.array(noise_min)) noise_max = np.mean(np.array(noise_max)) return noise_min, noise_max
def test_temporal_rsa(self): import numpy as np import matplotlib.pyplot as plt import rsatoolbox import pickle from rsatoolbox.rdm import calc_rdm_movie import os path = os.path.dirname(os.path.abspath(__file__)) dat = pickle.load( open( os.path.join(path, '..', 'demos', "TemporalSampleData", "meg_sample_data.pkl"), "rb")) measurements = dat['data'] cond_names = [x for x in dat['cond_names'].keys()] cond_idx = dat['cond_idx'] channel_names = dat['channel_names'] times = dat['times'] print( 'there are %d observations (trials), %d channels, and %d time-points\n' % (measurements.shape)) print('conditions:') print(cond_names) fig, ax = plt.subplots(1, 2, figsize=(12, 4)) ax = ax.flatten() for jj, chan in enumerate(channel_names[:2]): for ii, cond_ii in enumerate(np.unique(cond_idx)): mn = measurements[cond_ii == cond_idx, jj, :].mean(0).squeeze() ax[jj].plot(times, mn, label=cond_names[ii]) ax[jj].set_title(chan) ax[jj].legend() tim_des = {'time': times} des = {'session': 0, 'subj': 0} obs_des = {'conds': cond_idx} chn_des = {'channels': channel_names} data = rsatoolbox.data.TemporalDataset(measurements, descriptors=des, obs_descriptors=obs_des, channel_descriptors=chn_des, time_descriptors=tim_des) data.sort_by('conds') print('shape of original measurements') print(data.measurements.shape) data_split_time = data.split_time('time') print('\nafter splitting') print(len(data_split_time)) print(data_split_time[0].measurements.shape) print('shape of original measurements') print(data.measurements.shape) data_subset_time = data.subset_time('time', t_from=-.1, t_to=.5) print('\nafter subsetting') print(data_subset_time.measurements.shape) print(data_subset_time.time_descriptors['time'][0]) bins = np.reshape(tim_des['time'], [-1, 2]) print(len(bins)) print(bins[0]) print('shape of original measurements') print(data.measurements.shape) data_binned = data.bin_time('time', bins=bins) print('\nafter binning') print(data_binned.measurements.shape) print(data_binned.time_descriptors['time'][0]) print('shape of original measurements') print(data.measurements.shape) data_dataset = data.convert_to_dataset('time') print('\nafter binning') print(data_dataset.measurements.shape) print(data_dataset.obs_descriptors['time'][0]) rdms_data = calc_rdm_movie(data, method='euclidean', descriptor='conds') print(rdms_data) rdms_data_binned = calc_rdm_movie(data, method='euclidean', descriptor='conds', bins=bins) print(rdms_data_binned) plt.figure(figsize=(10, 15)) # add formated time as rdm_descriptor rdms_data_binned.rdm_descriptors['time_formatted'] = [ '%0.0f ms' % (np.round(x * 1000, 2)) for x in rdms_data_binned.rdm_descriptors['time'] ] rsatoolbox.vis.show_rdm(rdms_data_binned, pattern_descriptor='conds', rdm_descriptor='time_formatted') from rsatoolbox.rdm import get_categorical_rdm rdms_model_in = get_categorical_rdm(['%d' % x for x in range(4)]) rdms_model_lr = get_categorical_rdm(['l', 'r', 'l', 'r']) rdms_model_av = get_categorical_rdm(['a', 'a', 'v', 'v']) model_names = ['independent', 'left/right', 'audio/visual'] # append in one RDMs object model_rdms = rdms_model_in model_rdms.append(rdms_model_lr) model_rdms.append(rdms_model_av) model_rdms.rdm_descriptors['model_names'] = model_names model_rdms.pattern_descriptors['cond_names'] = cond_names plt.figure(figsize=(10, 10)) rsatoolbox.vis.show_rdm(model_rdms, rdm_descriptor='model_names', pattern_descriptor='cond_names') from rsatoolbox.rdm import compare r = [] for mod in model_rdms: r.append(compare(mod, rdms_data_binned, method='cosine')) for i, r_ in enumerate(r): plt.plot(rdms_data_binned.rdm_descriptors['time'], r_.squeeze(), label=model_names[i]) plt.xlabel('time') plt.ylabel('model-data cosine similarity') plt.legend()
def crossval(models, rdms, train_set, test_set, ceil_set=None, method='cosine', fitter=None, pattern_descriptor='index', calc_noise_ceil=True): """evaluates models on cross-validation sets Args: models(rsatoolbox.model.Model): models to be evaluated rdms(rsatoolbox.rdm.RDMs): full dataset train_set(list): a list of the training RDMs with 2-tuple entries: (RDMs, pattern_idx) test_set(list): a list of the test RDMs with 2-tuple entries: (RDMs, pattern_idx) method(string): comparison method to use pattern_descriptor(string): descriptor to group patterns Returns: numpy.ndarray: vector of evaluations """ assert len(train_set) == len(test_set), \ 'train_set and test_set must have the same length' if ceil_set is not None: assert len(ceil_set) == len(test_set), \ 'ceil_set and test_set must have the same length' if isinstance(models, Model): models = [models] evaluations = [] noise_ceil = [] for i in range(len(train_set)): train = train_set[i] test = test_set[i] if (train[0].n_rdm == 0 or test[0].n_rdm == 0 or train[0].n_cond <= 2 or test[0].n_cond <= 2): evals = np.empty(len(models)) * np.nan else: models, evals, _, fitter = \ input_check_model(models, None, fitter) for j, model in enumerate(models): theta = fitter[j](model, train[0], method=method, pattern_idx=train[1], pattern_descriptor=pattern_descriptor) pred = model.predict_rdm(theta) pred = pred.subsample_pattern(by=pattern_descriptor, value=test[1]) evals[j] = np.mean(compare(pred, test[0], method)) if ceil_set is None and calc_noise_ceil: noise_ceil.append( boot_noise_ceiling(rdms.subsample_pattern( by=pattern_descriptor, value=test[1]), method=method)) evaluations.append(evals) evaluations = np.array(evaluations).T # .T to switch models/set order evaluations = evaluations.reshape((1, len(models), len(train_set))) if ceil_set is not None and calc_noise_ceil: noise_ceil = cv_noise_ceiling(rdms, ceil_set, test_set, method=method, pattern_descriptor=pattern_descriptor) elif calc_noise_ceil: noise_ceil = np.array(noise_ceil).T else: noise_ceil = np.array([np.nan, np.nan]) result = Result(models, evaluations, method=method, cv_method='crossvalidation', noise_ceiling=noise_ceil) return result