def calc_rdm_correlation(dataset, descriptor=None): """ calculates an RDM from an input dataset using correlation distance If multiple instances of the same condition are found in the dataset they are averaged. Args: dataset (pyrsa.data.DatasetBase): The dataset the RDM is computed from descriptor (String): obs_descriptor used to define the rows/columns of the RDM defaults to one row/column per row in the dataset Returns: pyrsa.rdm.rdms.RDMs: RDMs object with the one RDM """ ma, desc, descriptor = _parse_input(dataset, descriptor) ma = ma - ma.mean(axis=1, keepdims=True) ma /= np.sqrt(np.einsum('ij,ij->i', ma, ma))[:, None] rdm = 1 - np.einsum('ik,jk', ma, ma) rdm = RDMs(dissimilarities=np.array([rdm]), dissimilarity_measure='correlation', descriptors=dataset.descriptors) rdm.pattern_descriptors[descriptor] = desc return rdm
def calc_rdm_poisson(dataset, descriptor=None, prior_lambda=1, prior_weight=0.1): """ calculates an RDM from an input dataset using the symmetrized KL-divergence assuming a poisson distribution. If multiple instances of the same condition are found in the dataset they are averaged. Args: dataset (pyrsa.data.DatasetBase): The dataset the RDM is computed from descriptor (String): obs_descriptor used to define the rows/columns of the RDM defaults to one row/column per row in the dataset Returns: pyrsa.rdm.rdms.RDMs: RDMs object with the one RDM """ measurements, desc, descriptor = _parse_input(dataset, descriptor) measurements = (measurements + prior_lambda * prior_weight) \ / (prior_lambda * prior_weight) diff = _calc_pairwise_differences(measurements) diff_log = _calc_pairwise_differences(np.log(measurements)) rdm = np.einsum('ij,ij->i', diff, diff_log) / measurements.shape[1] rdm = RDMs(dissimilarities=np.array([rdm]), dissimilarity_measure='poisson', descriptors=dataset.descriptors) rdm.pattern_descriptors[descriptor] = desc return rdm
def calc_rdm_euclid(dataset, descriptor=None): """ calculates an RDM from an input dataset using euclidean distance If multiple instances of the same condition are found in the dataset they are averaged. Args: dataset (pyrsa.data.DatasetBase): The dataset the RDM is computed from descriptor (String): obs_descriptor used to define the rows/columns of the RDM defaults to one row/column per row in the dataset Returns: pyrsa.rdm.rdms.RDMs: RDMs object with the one RDM """ measurements, desc, descriptor = _parse_input(dataset, descriptor) diff = _calc_pairwise_differences(measurements) rdm = np.einsum('ij,ij->i', diff, diff) / measurements.shape[1] rdm = RDMs(dissimilarities=np.array([rdm]), dissimilarity_measure='euclidean', descriptors=dataset.descriptors) rdm.pattern_descriptors[descriptor] = desc return rdm
def calc_rdm_mahalanobis(dataset, descriptor=None, noise=None): """ calculates an RDM from an input dataset using mahalanobis distance If multiple instances of the same condition are found in the dataset they are averaged. Args: dataset (pyrsa.data.DatasetBase): The dataset the RDM is computed from descriptor (String): obs_descriptor used to define the rows/columns of the RDM defaults to one row/column per row in the dataset noise (numpy.ndarray): dataset.n_channel x dataset.n_channel precision matrix used to calculate the RDM Returns: RDMs object with the one RDM """ measurements, desc, descriptor = _parse_input(dataset, descriptor) noise = _check_noise(noise, dataset.n_channel) c_matrix = allpairs(np.arange(measurements.shape[0])) diff = np.matmul(c_matrix, measurements) diff2 = np.matmul(noise, diff.T).T rdm = np.einsum('ij,ij->i', diff, diff2) / measurements.shape[1] rdm = RDMs(dissimilarities=np.array([rdm]), dissimilarity_measure='Mahalanobis', descriptors=dataset.descriptors) rdm.pattern_descriptors[descriptor] = desc rdm.descriptors['noise'] = noise return rdm
def calc_rdm_poisson_cv(dataset, descriptor=None, prior_lambda=1, prior_weight=0.1, cv_descriptor=None): """ calculates an RDM from an input dataset using the crossvalidated symmetrized KL-divergence assuming a poisson distribution To assert equal ordering in the folds the dataset is initially sorted according to the descriptor used to define the patterns. Args: dataset (pyrsa.data.DatasetBase): The dataset the RDM is computed from descriptor (String): obs_descriptor used to define the rows/columns of the RDM defaults to one row/column per row in the dataset cv_descriptor (str): The descriptor that indicates the folds to use for crossvalidation Returns: pyrsa.rdm.rdms.RDMs: RDMs object with the one RDM """ if descriptor is None: raise ValueError('descriptor must be a string! Crossvalidation' + 'requires multiple measurements to be grouped') if cv_descriptor is None: cv_desc = _gen_default_cv_descriptor(dataset, descriptor) dataset.obs_descriptors['cv_desc'] = cv_desc cv_descriptor = 'cv_desc' dataset.sort_by(descriptor) cv_folds = np.unique(np.array(dataset.obs_descriptors[cv_descriptor])) for i_fold in range(len(cv_folds)): fold = cv_folds[i_fold] data_test = dataset.subset_obs(cv_descriptor, fold) data_train = dataset.subset_obs(cv_descriptor, np.setdiff1d(cv_folds, fold)) measurements_train, _, _ = average_dataset_by(data_train, descriptor) measurements_test, _, _ = average_dataset_by(data_test, descriptor) measurements_train = (measurements_train + prior_lambda * prior_weight) \ / (prior_lambda * prior_weight) measurements_test = (measurements_test + prior_lambda * prior_weight) \ / (prior_lambda * prior_weight) diff = _calc_pairwise_differences(measurements_train) diff_log = _calc_pairwise_differences(np.log(measurements_test)) rdm = np.einsum('ij,ij->i', diff, diff_log) \ / measurements_train.shape[1] rdm = RDMs(dissimilarities=np.array([rdm]), dissimilarity_measure='poisson_cv', descriptors=dataset.descriptors) _, desc, _ = average_dataset_by(dataset, descriptor) rdm.pattern_descriptors[descriptor] = desc return rdm
def load_rdms(fpath, sort=True): """Read a Meadows results file and return any RDMs as a pyrsa object Args: fpath (str): path to .mat Meadows results file sort (bool): whether to sort the RDM based on the stimulus names Raises: ValueError: Will raise an error if the file is missing an expected variable. This can happen if the file does not contain MA task data. Returns: RDMs: All rdms found in the data file as an RDMs object """ info = extract_filename_segments(fpath) data = loadmat(fpath) if info['participant_scope'] == 'single': for var in ('stimuli', 'rdmutv'): if var not in data: raise ValueError(f'File missing variable: {var}') utvs = data['rdmutv'] stimuli_fnames = data['stimuli'] pnames = [info['participant']] else: stim_vars = [v for v in data.keys() if v[:7] == 'stimuli'] stimuli_fnames = data[stim_vars[0]] pnames = ['-'.join(v.split('_')[1:]) for v in stim_vars] utv_vars = ['rdmutv_' + p.replace('-', '_') for p in pnames] utvs = numpy.squeeze(numpy.stack([data[v] for v in utv_vars])) desc_info_keys = ('participant', 'task_index', 'task_name', 'experiment_name') conds = [f.split('.')[0] for f in stimuli_fnames] rdms = RDMs( utvs, dissimilarity_measure='euclidean', descriptors={k: info[k] for k in desc_info_keys if k in info}, rdm_descriptors=dict(participants=pnames), pattern_descriptors=dict(conds=conds), ) if sort: rdms.sort_by(conds='alpha') return rdms
def calc_rdm_correlation(dataset, descriptor=None): """ calculates an RDM from an input dataset using correlation distance If multiple instances of the same condition are found in the dataset they are averaged. Args: dataset (pyrsa.data.DatasetBase): The dataset the RDM is computed from descriptor (String): obs_descriptor used to define the rows/columns of the RDM defaults to one row/column per row in the dataset Returns: RDMs object with the one RDM """ measurements, desc, descriptor = _parse_input(dataset, descriptor) rdm = 1 - np.corrcoef(measurements) rdm = RDMs(dissimilarities=np.array([rdm]), dissimilarity_measure='euclidean', descriptors=dataset.descriptors) rdm.pattern_descriptors[descriptor] = desc return rdm
def calc_rdm_mahalanobis(dataset, descriptor=None, noise=None): """ calculates an RDM from an input dataset using mahalanobis distance If multiple instances of the same condition are found in the dataset they are averaged. Args: dataset (pyrsa.data.dataset.DatasetBase): The dataset the RDM is computed from descriptor (String): obs_descriptor used to define the rows/columns of the RDM defaults to one row/column per row in the dataset noise (numpy.ndarray): dataset.n_channel x dataset.n_channel precision matrix used to calculate the RDM default: identity matrix, i.e. euclidean distance Returns: pyrsa.rdm.rdms.RDMs: RDMs object with the one RDM """ if noise is None: rdm = calc_rdm_euclid(dataset, descriptor) else: measurements, desc, descriptor = _parse_input(dataset, descriptor) noise = _check_noise(noise, dataset.n_channel) # calculate difference @ precision @ difference for all pairs # first calculate the difference vectors diff and precision @ diff # then calculate the inner product diff = _calc_pairwise_differences(measurements) diff2 = (noise @ diff.T).T rdm = np.einsum('ij,ij->i', diff, diff2) / measurements.shape[1] rdm = RDMs(dissimilarities=np.array([rdm]), dissimilarity_measure='Mahalanobis', descriptors=dataset.descriptors) rdm.pattern_descriptors[descriptor] = desc rdm.descriptors['noise'] = noise return rdm
def calc_rdm_crossnobis(dataset, descriptor, noise=None, cv_descriptor=None): """ calculates an RDM from an input dataset using Cross-nobis distance This performs leave one out crossvalidation over the cv_descriptor Args: dataset (pyrsa.data.DatasetBase): The dataset the RDM is computed from descriptor (String): obs_descriptor used to define the rows/columns of the RDM defaults to one row/column per row in the dataset noise (numpy.ndarray): dataset.n_channel x dataset.n_channel precision matrix used to calculate the RDM cv_descriptor (String): obs_descriptor which determines the cross-validation folds Returns: RDMs object with the one RDM """ noise = _check_noise(noise, dataset.n_channel) if descriptor is None: raise ValueError('descriptor must be a string! Crossvalidation' + 'requires multiple measurements to be grouped') cv_folds = np.unique(np.array(dataset.obs_descriptors[cv_descriptor])) weights = [] rdms = [] for i_fold in cv_folds: data_train = dataset.subset_obs(cv_descriptor, i_fold) data_test = dataset.subset_obs(cv_descriptor, np.setdiff1d(cv_folds, i_fold)) measurements_train, desc = average_dataset_by(data_train, descriptor) measurements_test, desc = average_dataset_by(data_test, descriptor) rdm = _calc_rdm_crossnobis_single(measurements_train, measurements_test, noise) rdms.append(rdm) weights.append(data_test.n_obs) rdms = np.array(rdms) weights = np.array(weights) rdm = np.einsum('ij,i->j', rdms, weights) / np.sum(weights) rdm = RDMs(dissimilarities=np.array([rdm]), dissimilarity_measure='crossnobis', descriptors=dataset.descriptors) if descriptor is None: rdm.pattern_descriptors['pattern'] = list(np.arange(rdm.n_cond)) else: rdm.pattern_descriptors[descriptor] = desc rdm.descriptors['noise'] = noise rdm.descriptors['cv_descriptor'] = cv_descriptor return rdm
def calc_rdm_crossnobis(dataset, descriptor, noise=None, cv_descriptor=None): """ calculates an RDM from an input dataset using Cross-nobis distance This performs leave one out crossvalidation over the cv_descriptor. As the minimum input provide a dataset and a descriptor-name to define the rows & columns of the RDM. You may pass a noise precision. If you don't an identity is assumed. Also a cv_descriptor can be passed to define the crossvalidation folds. It is recommended to do this, to assure correct calculations. If you do not, this function infers a split in order of the dataset, which is guaranteed to fail if there are any unbalances. This function also accepts a list of noise precision matricies. It is then assumed that this is the precision of the mean from the corresponding crossvalidation fold, i.e. if multiple measurements enter a fold, please compute the resulting noise precision in advance! To assert equal ordering in the folds the dataset is initially sorted according to the descriptor used to define the patterns. Args: dataset (pyrsa.data.dataset.DatasetBase): The dataset the RDM is computed from descriptor (String): obs_descriptor used to define the rows/columns of the RDM defaults to one row/column per row in the dataset noise (numpy.ndarray): dataset.n_channel x dataset.n_channel precision matrix used to calculate the RDM default: identity matrix, i.e. euclidean distance cv_descriptor (String): obs_descriptor which determines the cross-validation folds Returns: pyrsa.rdm.rdms.RDMs: RDMs object with the one RDM """ noise = _check_noise(noise, dataset.n_channel) if descriptor is None: raise ValueError('descriptor must be a string! Crossvalidation' + 'requires multiple measurements to be grouped') if cv_descriptor is None: cv_desc = _gen_default_cv_descriptor(dataset, descriptor) dataset.obs_descriptors['cv_desc'] = cv_desc cv_descriptor = 'cv_desc' dataset.sort_by(descriptor) cv_folds = np.unique(np.array(dataset.obs_descriptors[cv_descriptor])) weights = [] rdms = [] if noise is None or (isinstance(noise, np.ndarray) and noise.ndim == 2): for i_fold in range(len(cv_folds)): fold = cv_folds[i_fold] data_test = dataset.subset_obs(cv_descriptor, fold) data_train = dataset.subset_obs(cv_descriptor, np.setdiff1d(cv_folds, fold)) measurements_train, _, _ = \ average_dataset_by(data_train, descriptor) measurements_test, _, _ = \ average_dataset_by(data_test, descriptor) n_cond = measurements_train.shape[0] rdm = np.empty(int(n_cond * (n_cond - 1) / 2)) k = 0 for i_cond in range(n_cond - 1): for j_cond in range(i_cond + 1, n_cond): diff_train = measurements_train[i_cond] \ - measurements_train[j_cond] diff_test = measurements_test[i_cond] \ - measurements_test[j_cond] if noise is None: rdm[k] = np.sum(diff_train * diff_test) else: rdm[k] = np.sum(diff_train * np.matmul(noise, diff_test)) k += 1 rdms.append(rdm) weights.append(data_test.n_obs) else: # a list of noises was provided measurements = [] variances = [] for i_fold in range(len(cv_folds)): data = dataset.subset_obs(cv_descriptor, cv_folds[i_fold]) measurements.append(average_dataset_by(data, descriptor)[0]) variances.append(np.linalg.inv(noise[i_fold])) for i_fold in range(len(cv_folds)): for j_fold in range(i_fold + 1, len(cv_folds)): if i_fold != j_fold: rdm = _calc_rdm_crossnobis_single( measurements[i_fold], measurements[j_fold], np.linalg.inv(variances[i_fold] + variances[j_fold])) rdms.append(rdm) rdms = np.array(rdms) rdm = np.einsum('ij->j', rdms) rdm = RDMs(dissimilarities=np.array([rdm]), dissimilarity_measure='crossnobis', descriptors=dataset.descriptors) _, desc, _ = average_dataset_by(dataset, descriptor) rdm.pattern_descriptors[descriptor] = desc rdm.descriptors['noise'] = noise rdm.descriptors['cv_descriptor'] = cv_descriptor return rdm