def eval_fixed(models, data, theta=None, method='cosine'): """evaluates models on data, without any bootstrapping or cross-validation Args: models(list of pyrsa.model.Model): models to be evaluated data(pyrsa.rdm.RDMs): data to evaluate on theta(numpy.ndarray): parameter vector for the models method(string): comparison method to use Returns: float: evaluation """ evaluations, theta, _ = input_check_model(models, theta, None, 1) if isinstance(models, Model): rdm_pred = models.predict_rdm(theta=theta) evaluations = np.array([[compare(rdm_pred, data, method)[0]]]) elif isinstance(models, Iterable): for k in range(len(models)): rdm_pred = models[k].predict_rdm(theta=theta[k]) evaluations[k] = np.mean(compare(rdm_pred, data, method)[0]) evaluations = evaluations.reshape((1, len(models))) else: raise ValueError('models should be a pyrsa.model.Model or a list of' + ' such objects') noise_ceil = boot_noise_ceiling(data, method=method, rdm_descriptor='index') result = Result(models, evaluations, method=method, cv_method='fixed', noise_ceiling=noise_ceil) return result
def boot_noise_ceiling(rdms, method='cosine', rdm_descriptor='index'): """ calculates a noise ceiling by leave one out & full set Args: rdms(pyrsa.rdm.RDMs): data to calculate noise ceiling method(string): comparison method to use rdm_descriptor(string): descriptor to group rdms Returns: list: [lower nc-bound, upper nc-bound] """ _, test_set, ceil_set = sets_leave_one_out_rdm(rdms, rdm_descriptor) pred_test = pool_rdm(rdms, method=method) noise_min = [] noise_max = [] for i in range(len(ceil_set)): train = ceil_set[i] test = test_set[i] pred_train = pool_rdm(train[0], method=method) noise_min.append(np.mean(compare(pred_train, test[0], method))) noise_max.append(np.mean(compare(pred_test, test[0], method))) noise_min = np.mean(np.array(noise_min)) noise_max = np.mean(np.array(noise_max)) return noise_min, noise_max
def eval_bootstrap_rdm(models, data, theta=None, method='cosine', N=1000, rdm_descriptor='index', boot_noise_ceil=True): """evaluates models on data performs bootstrapping to get a sampling distribution Args: models(pyrsa.model.Model): models to be evaluated data(pyrsa.rdm.RDMs): data to evaluate on theta(numpy.ndarray): parameter vector for the models method(string): comparison method to use N(int): number of samples rdm_descriptor(string): rdm_descriptor to group rdms for bootstrap Returns: numpy.ndarray: vector of evaluations """ evaluations, theta, _ = input_check_model(models, theta, None, N) noise_min = [] noise_max = [] for i in tqdm.trange(N): sample, rdm_idx = bootstrap_sample_rdm(data, rdm_descriptor) if isinstance(models, Model): rdm_pred = models.predict_rdm(theta=theta) evaluations[i] = np.mean(compare(rdm_pred, sample, method)) elif isinstance(models, Iterable): j = 0 for mod in models: rdm_pred = mod.predict_rdm(theta=theta[j]) evaluations[i, j] = np.mean(compare(rdm_pred, sample, method)) j += 1 if boot_noise_ceil: noise_min_sample, noise_max_sample = boot_noise_ceiling( sample, method=method, rdm_descriptor=rdm_descriptor) noise_min.append(noise_min_sample) noise_max.append(noise_max_sample) if isinstance(models, Model): evaluations = evaluations.reshape((N, 1)) if boot_noise_ceil: noise_ceil = np.array([noise_min, noise_max]) else: noise_ceil = np.array( boot_noise_ceiling(data, method=method, rdm_descriptor=rdm_descriptor)) result = Result(models, evaluations, method=method, cv_method='bootstrap_rdm', noise_ceiling=noise_ceil) return result
def fit_select(model, data, method='cosine', pattern_idx=None, pattern_descriptor=None): """ fits selection models by evaluating each rdm and selcting the one with best performance. Works only for ModelSelect Args: model(pyrsa.model.Model): model to be fit data(pyrsa.rdm.RDMs): Data to fit to method(String): Evaluation method pattern_idx(numpy.ndarray): Which patterns are sampled pattern_descriptor(String): Which descriptor is used Returns: theta(int): parameter vector """ evaluations = np.zeros(model.n_rdm) for i_rdm in range(model.n_rdm): pred = model.predict_rdm(i_rdm) if not (pattern_idx is None or pattern_descriptor is None): pred = pred.subsample_pattern(pattern_descriptor, pattern_idx) evaluations[i_rdm] = np.mean(compare(pred, data, method=method)) theta = np.argmax(evaluations) return theta
def _loss(theta, model, data, method='cosine', cov=None, pattern_descriptor=None, pattern_idx=None): """Method for calculating a loss for a model and parameter combination Args: theta(numpy.ndarray): evaluated parameter value model(Model): the model to be fit data(pyrsa.rdm.RDMs): data to be fit method(String, optional): evaluation metric The default is 'cosine'. pattern_idx(numpy.ndarray, optional) sampled patterns The default is None. pattern_descriptor (String, optional) descriptor used for fitting. The default is None. cov(numpy.ndarray, optional): Covariance matrix for likelihood based evaluation. It is ignored otherwise. The default is None. Returns: numpy.ndarray: loss """ pred = model.predict_rdm(theta) if not (pattern_idx is None or pattern_descriptor is None): pred = pred.subsample_pattern(pattern_descriptor, pattern_idx) return -np.mean(compare(pred, data, method=method))
def cv_noise_ceiling(rdms, ceil_set, test_set, method='cosine', pattern_descriptor='index'): """ calculates the noise ceiling for crossvalidation. The upper bound is calculated by pooling all rdms for the appropriate patterns in the testsets. the lower bound is calculated by using only the appropriate rdms from ceil_set for training. Args: rdms(pyrsa.rdm.RDMs): complete data ceil_set(list): a list of the training RDMs with 2-tuple entries: (RDMs, pattern_idx) test_set(list): a list of the test RDMs with 2-tuple entries: (RDMs, pattern_idx) method(string): comparison method to use pattern_descriptor(string): descriptor to group patterns Returns: list: lower nc-bound, upper nc-bound """ assert len(ceil_set) == len(test_set), \ 'train_set and test_set must have the same length' noise_min = [] noise_max = [] for i in range(len(ceil_set)): train = ceil_set[i] test = test_set[i] pred_train = pool_rdm(train[0], method=method) pred_train = pred_train.subsample_pattern(by=pattern_descriptor, value=test[1]) pred_test = pool_rdm(rdms, method=method) pred_test = pred_test.subsample_pattern(by=pattern_descriptor, value=test[1]) noise_min.append(np.mean(compare(pred_train, test[0], method))) noise_max.append(np.mean(compare(pred_test, test[0], method))) noise_min = np.mean(np.array(noise_min)) noise_max = np.mean(np.array(noise_max)) return noise_min, noise_max
def test_temporal_rsa(self): import numpy as np import matplotlib.pyplot as plt import pyrsa import pickle from pyrsa.rdm import calc_rdm_movie import os path = os.path.dirname(os.path.abspath(__file__)) dat = pickle.load( open( os.path.join(path, '..', 'demos', "TemporalSampleData", "meg_sample_data.pkl"), "rb")) measurements = dat['data'] cond_names = [x for x in dat['cond_names'].keys()] cond_idx = dat['cond_idx'] channel_names = dat['channel_names'] times = dat['times'] print( 'there are %d observations (trials), %d channels, and %d time-points\n' % (measurements.shape)) print('conditions:') print(cond_names) fig, ax = plt.subplots(1, 2, figsize=(12, 4)) ax = ax.flatten() for jj, chan in enumerate(channel_names[:2]): for ii, cond_ii in enumerate(np.unique(cond_idx)): mn = measurements[cond_ii == cond_idx, jj, :].mean(0).squeeze() ax[jj].plot(times, mn, label=cond_names[ii]) ax[jj].set_title(chan) ax[jj].legend() tim_des = {'time': times} des = {'session': 0, 'subj': 0} obs_des = {'conds': cond_idx} chn_des = {'channels': channel_names} data = pyrsa.data.TemporalDataset(measurements, descriptors=des, obs_descriptors=obs_des, channel_descriptors=chn_des, time_descriptors=tim_des) data.sort_by('conds') print('shape of original measurements') print(data.measurements.shape) data_split_time = data.split_time('time') print('\nafter splitting') print(len(data_split_time)) print(data_split_time[0].measurements.shape) print('shape of original measurements') print(data.measurements.shape) data_subset_time = data.subset_time('time', t_from=-.1, t_to=.5) print('\nafter subsetting') print(data_subset_time.measurements.shape) print(data_subset_time.time_descriptors['time'][0]) bins = np.reshape(tim_des['time'], [-1, 2]) print(len(bins)) print(bins[0]) print('shape of original measurements') print(data.measurements.shape) data_binned = data.bin_time('time', bins=bins) print('\nafter binning') print(data_binned.measurements.shape) print(data_binned.time_descriptors['time'][0]) print('shape of original measurements') print(data.measurements.shape) data_dataset = data.convert_to_dataset('time') print('\nafter binning') print(data_dataset.measurements.shape) print(data_dataset.obs_descriptors['time'][0]) rdms_data = calc_rdm_movie(data, method='euclidean', descriptor='conds') print(rdms_data) rdms_data_binned = calc_rdm_movie(data, method='euclidean', descriptor='conds', bins=bins) print(rdms_data_binned) plt.figure(figsize=(10, 15)) # add formated time as rdm_descriptor rdms_data_binned.rdm_descriptors['time_formatted'] = [ '%0.0f ms' % (np.round(x * 1000, 2)) for x in rdms_data_binned.rdm_descriptors['time'] ] pyrsa.vis.show_rdm(rdms_data_binned, do_rank_transform=False, pattern_descriptor='conds', rdm_descriptor='time_formatted') from pyrsa.rdm import get_categorical_rdm rdms_model_in = get_categorical_rdm(['%d' % x for x in range(4)]) rdms_model_lr = get_categorical_rdm(['l', 'r', 'l', 'r']) rdms_model_av = get_categorical_rdm(['a', 'a', 'v', 'v']) model_names = ['independent', 'left/right', 'audio/visual'] # append in one RDMs object model_rdms = rdms_model_in model_rdms.append(rdms_model_lr) model_rdms.append(rdms_model_av) model_rdms.rdm_descriptors['model_names'] = model_names model_rdms.pattern_descriptors['cond_names'] = cond_names plt.figure(figsize=(10, 10)) pyrsa.vis.show_rdm(model_rdms, rdm_descriptor='model_names', pattern_descriptor='cond_names') from pyrsa.rdm import compare r = [] for mod in model_rdms: r.append(compare(mod, rdms_data_binned, method='cosine')) for i, r_ in enumerate(r): plt.plot(rdms_data_binned.rdm_descriptors['time'], r_.squeeze(), label=model_names[i]) plt.xlabel('time') plt.ylabel('model-data cosine similarity') plt.legend()
def crossval(models, rdms, train_set, test_set, ceil_set=None, method='cosine', fitter=None, pattern_descriptor='index'): """evaluates models on cross-validation sets Args: models(pyrsa.model.Model): models to be evaluated rdms(pyrsa.rdm.RDMs): full dataset train_set(list): a list of the training RDMs with 2-tuple entries: (RDMs, pattern_idx) test_set(list): a list of the test RDMs with 2-tuple entries: (RDMs, pattern_idx) method(string): comparison method to use pattern_descriptor(string): descriptor to group patterns Returns: numpy.ndarray: vector of evaluations """ assert len(train_set) == len(test_set), \ 'train_set and test_set must have the same length' if ceil_set is not None: assert len(ceil_set) == len(test_set), \ 'ceil_set and test_set must have the same length' evaluations = [] noise_ceil = [] for i in range(len(train_set)): train = train_set[i] test = test_set[i] if (train[0].n_rdm == 0 or test[0].n_rdm == 0 or train[0].n_cond <= 2 or test[0].n_cond <= 2): if isinstance(models, Model): evals = np.nan elif isinstance(models, Iterable): evals = np.empty(len(models)) * np.nan else: if isinstance(models, Model): if fitter is None: fitter = models.default_fitter theta = fitter(models, train[0], method=method, pattern_idx=train[1], pattern_descriptor=pattern_descriptor) pred = models.predict_rdm(theta) pred = pred.subsample_pattern(by=pattern_descriptor, value=test[1]) evals = np.mean(compare(pred, test[0], method)) elif isinstance(models, Iterable): evals, _, fitter = input_check_model(models, None, fitter) for j in range(len(models)): theta = fitter[j](models[j], train[0], method=method, pattern_idx=train[1], pattern_descriptor=pattern_descriptor) pred = models[j].predict_rdm(theta) pred = pred.subsample_pattern(by=pattern_descriptor, value=test[1]) evals[j] = np.mean(compare(pred, test[0], method)) if ceil_set is None: noise_ceil.append( boot_noise_ceiling(rdms.subsample_pattern( by=pattern_descriptor, value=test[1]), method=method)) evaluations.append(evals) if isinstance(models, Model): models = [models] evaluations = np.array(evaluations).T # .T to switch models/set order evaluations = evaluations.reshape((1, len(models), len(train_set))) if ceil_set is not None: noise_ceil = cv_noise_ceiling(rdms, ceil_set, test_set, method=method, pattern_descriptor=pattern_descriptor) else: noise_ceil = np.array(noise_ceil).T result = Result(models, evaluations, method=method, cv_method='crossvalidation', noise_ceiling=noise_ceil) return result
#data_burst = np.empty([len(area_data_burstprop),list(torch.flatten(area_data_burstprop[0]).shape)[0]]) for i in range(len(area_data_ann)): area_data_ann[i] = area_data_ann[i].cpu() #area_data_hebb[i] = area_data_hebb[i].cpu() #area_data_burstprop[i] = area_data_burstprop[i].cpu() data_ann[i, :] = torch.flatten(area_data_ann[i]).detach().numpy() #data_hebb[i,:] = torch.flatten(area_data_hebb[i]).detach().numpy() #data_burst[i,:] = torch.flatten(area_data_burstprop[i]).detach().numpy() dataset_ann = data.dataset.Dataset(data_ann) rdm_ann = rdm.calc.calc_rdm(dataset_ann) #dataset_hebb = data.dataset.Dataset(data_hebb) #rdm_hebb = rdm.calc.calc_rdm(dataset_hebb) #dataset_burst = data.dataset.Dataset(data_burst) #rdm_burst = rdm.calc.calc_rdm(dataset_burst) similarity_ann_area = rdm.compare(rdm_area, rdm_ann) #similarity_hebb_area = rdm.compare(rdm_area, rdm_hebb) #similarity_burst_area = rdm.compare(rdm_area, rdm_burst) rsa = [rdm_ann, similarity_ann_area] #rsa_hebb = [rdm_hebb, similarity_hebb_area] #rsa_burst = [rdm_burst, similarity_burst_area] #rsa_all = [rsa_ann, rsa_hebb, rsa_burst] brain_area_rsa.update({ar: rsa}) a_file = open( "./" + model + "/final_model_data/" + model + "_brain_area_rsa.pkl", "wb") pickle.dump(brain_area_rsa, a_file) a_file.close() print('Completed RSA for model: ' + model)