Beispiel #1
0
def calc_rdm_correlation(dataset, descriptor=None):
    """
    calculates an RDM from an input dataset using correlation distance
    If multiple instances of the same condition are found in the dataset
    they are averaged.

    Args:
        dataset (pyrsa.data.DatasetBase):
            The dataset the RDM is computed from
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
            defaults to one row/column per row in the dataset

    Returns:
        pyrsa.rdm.rdms.RDMs: RDMs object with the one RDM

    """
    ma, desc, descriptor = _parse_input(dataset, descriptor)
    ma = ma - ma.mean(axis=1, keepdims=True)
    ma /= np.sqrt(np.einsum('ij,ij->i', ma, ma))[:, None]
    rdm = 1 - np.einsum('ik,jk', ma, ma)
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='correlation',
               descriptors=dataset.descriptors)
    rdm.pattern_descriptors[descriptor] = desc
    return rdm
Beispiel #2
0
def calc_rdm_poisson(dataset,
                     descriptor=None,
                     prior_lambda=1,
                     prior_weight=0.1):
    """
    calculates an RDM from an input dataset using the symmetrized
    KL-divergence assuming a poisson distribution.
    If multiple instances of the same condition are found in the dataset
    they are averaged.

    Args:
        dataset (pyrsa.data.DatasetBase):
            The dataset the RDM is computed from
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
            defaults to one row/column per row in the dataset

    Returns:
        pyrsa.rdm.rdms.RDMs: RDMs object with the one RDM

    """
    measurements, desc, descriptor = _parse_input(dataset, descriptor)
    measurements = (measurements + prior_lambda * prior_weight) \
        / (prior_lambda * prior_weight)
    diff = _calc_pairwise_differences(measurements)
    diff_log = _calc_pairwise_differences(np.log(measurements))
    rdm = np.einsum('ij,ij->i', diff, diff_log) / measurements.shape[1]
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='poisson',
               descriptors=dataset.descriptors)
    rdm.pattern_descriptors[descriptor] = desc
    return rdm
Beispiel #3
0
def calc_rdm_euclid(dataset, descriptor=None):
    """
    calculates an RDM from an input dataset using euclidean distance
    If multiple instances of the same condition are found in the dataset
    they are averaged.

    Args:
        dataset (pyrsa.data.DatasetBase):
            The dataset the RDM is computed from
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
            defaults to one row/column per row in the dataset

    Returns:
        pyrsa.rdm.rdms.RDMs: RDMs object with the one RDM

    """
    measurements, desc, descriptor = _parse_input(dataset, descriptor)
    diff = _calc_pairwise_differences(measurements)
    rdm = np.einsum('ij,ij->i', diff, diff) / measurements.shape[1]
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='euclidean',
               descriptors=dataset.descriptors)
    rdm.pattern_descriptors[descriptor] = desc
    return rdm
Beispiel #4
0
def calc_rdm_mahalanobis(dataset, descriptor=None, noise=None):
    """
    calculates an RDM from an input dataset using mahalanobis distance
    If multiple instances of the same condition are found in the dataset
    they are averaged.

        Args:
            dataset (pyrsa.data.DatasetBase):
                The dataset the RDM is computed from
            descriptor (String):
                obs_descriptor used to define the rows/columns of the RDM
                defaults to one row/column per row in the dataset
            noise (numpy.ndarray):
                dataset.n_channel x dataset.n_channel
                precision matrix used to calculate the RDM
        Returns:
            RDMs object with the one RDM
    """
    measurements, desc, descriptor = _parse_input(dataset, descriptor)
    noise = _check_noise(noise, dataset.n_channel)
    c_matrix = allpairs(np.arange(measurements.shape[0]))
    diff = np.matmul(c_matrix, measurements)
    diff2 = np.matmul(noise, diff.T).T
    rdm = np.einsum('ij,ij->i', diff, diff2) / measurements.shape[1]
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='Mahalanobis',
               descriptors=dataset.descriptors)
    rdm.pattern_descriptors[descriptor] = desc
    rdm.descriptors['noise'] = noise
    return rdm
Beispiel #5
0
def calc_rdm_poisson_cv(dataset,
                        descriptor=None,
                        prior_lambda=1,
                        prior_weight=0.1,
                        cv_descriptor=None):
    """
    calculates an RDM from an input dataset using the crossvalidated
    symmetrized KL-divergence assuming a poisson distribution

    To assert equal ordering in the folds the dataset is initially sorted
    according to the descriptor used to define the patterns.

    Args:
        dataset (pyrsa.data.DatasetBase):
            The dataset the RDM is computed from
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
            defaults to one row/column per row in the dataset
        cv_descriptor (str): The descriptor that indicates the folds
            to use for crossvalidation

    Returns:
        pyrsa.rdm.rdms.RDMs: RDMs object with the one RDM

    """
    if descriptor is None:
        raise ValueError('descriptor must be a string! Crossvalidation' +
                         'requires multiple measurements to be grouped')
    if cv_descriptor is None:
        cv_desc = _gen_default_cv_descriptor(dataset, descriptor)
        dataset.obs_descriptors['cv_desc'] = cv_desc
        cv_descriptor = 'cv_desc'

    dataset.sort_by(descriptor)
    cv_folds = np.unique(np.array(dataset.obs_descriptors[cv_descriptor]))
    for i_fold in range(len(cv_folds)):
        fold = cv_folds[i_fold]
        data_test = dataset.subset_obs(cv_descriptor, fold)
        data_train = dataset.subset_obs(cv_descriptor,
                                        np.setdiff1d(cv_folds, fold))
        measurements_train, _, _ = average_dataset_by(data_train, descriptor)
        measurements_test, _, _ = average_dataset_by(data_test, descriptor)
        measurements_train = (measurements_train
                              + prior_lambda * prior_weight) \
            / (prior_lambda * prior_weight)
        measurements_test = (measurements_test
                             + prior_lambda * prior_weight) \
            / (prior_lambda * prior_weight)
        diff = _calc_pairwise_differences(measurements_train)
        diff_log = _calc_pairwise_differences(np.log(measurements_test))
        rdm = np.einsum('ij,ij->i', diff, diff_log) \
            / measurements_train.shape[1]
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='poisson_cv',
               descriptors=dataset.descriptors)
    _, desc, _ = average_dataset_by(dataset, descriptor)
    rdm.pattern_descriptors[descriptor] = desc
    return rdm
Beispiel #6
0
def load_rdms(fpath, sort=True):
    """Read a Meadows results file and return any RDMs as a pyrsa object

    Args:
        fpath (str): path to .mat Meadows results file
        sort (bool): whether to sort the RDM based on the stimulus names

    Raises:
        ValueError: Will raise an error if the file is missing an expected
            variable. This can happen if the file does not contain MA task
            data.

    Returns:
        RDMs: All rdms found in the data file as an RDMs object
    """
    info = extract_filename_segments(fpath)
    data = loadmat(fpath)
    if info['participant_scope'] == 'single':
        for var in ('stimuli', 'rdmutv'):
            if var not in data:
                raise ValueError(f'File missing variable: {var}')
        utvs = data['rdmutv']
        stimuli_fnames = data['stimuli']
        pnames = [info['participant']]
    else:
        stim_vars = [v for v in data.keys() if v[:7] == 'stimuli']
        stimuli_fnames = data[stim_vars[0]]
        pnames = ['-'.join(v.split('_')[1:]) for v in stim_vars]
        utv_vars = ['rdmutv_' + p.replace('-', '_') for p in pnames]
        utvs = numpy.squeeze(numpy.stack([data[v] for v in utv_vars]))

    desc_info_keys = ('participant', 'task_index', 'task_name',
                      'experiment_name')
    conds = [f.split('.')[0] for f in stimuli_fnames]
    rdms = RDMs(
        utvs,
        dissimilarity_measure='euclidean',
        descriptors={k: info[k]
                     for k in desc_info_keys if k in info},
        rdm_descriptors=dict(participants=pnames),
        pattern_descriptors=dict(conds=conds),
    )
    if sort:
        rdms.sort_by(conds='alpha')
    return rdms
Beispiel #7
0
def calc_rdm_correlation(dataset, descriptor=None):
    """
    calculates an RDM from an input dataset using correlation distance
    If multiple instances of the same condition are found in the dataset
    they are averaged.

        Args:
            dataset (pyrsa.data.DatasetBase):
                The dataset the RDM is computed from
            descriptor (String):
                obs_descriptor used to define the rows/columns of the RDM
                defaults to one row/column per row in the dataset
        Returns:
            RDMs object with the one RDM
    """
    measurements, desc, descriptor = _parse_input(dataset, descriptor)
    rdm = 1 - np.corrcoef(measurements)
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='euclidean',
               descriptors=dataset.descriptors)
    rdm.pattern_descriptors[descriptor] = desc
    return rdm
Beispiel #8
0
def calc_rdm_mahalanobis(dataset, descriptor=None, noise=None):
    """
    calculates an RDM from an input dataset using mahalanobis distance
    If multiple instances of the same condition are found in the dataset
    they are averaged.

    Args:
        dataset (pyrsa.data.dataset.DatasetBase):
            The dataset the RDM is computed from
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
            defaults to one row/column per row in the dataset
        noise (numpy.ndarray):
            dataset.n_channel x dataset.n_channel
            precision matrix used to calculate the RDM
            default: identity matrix, i.e. euclidean distance

    Returns:
        pyrsa.rdm.rdms.RDMs: RDMs object with the one RDM

    """
    if noise is None:
        rdm = calc_rdm_euclid(dataset, descriptor)
    else:
        measurements, desc, descriptor = _parse_input(dataset, descriptor)
        noise = _check_noise(noise, dataset.n_channel)
        # calculate difference @ precision @ difference for all pairs
        # first calculate the difference vectors diff and precision @ diff
        # then calculate the inner product
        diff = _calc_pairwise_differences(measurements)
        diff2 = (noise @ diff.T).T
        rdm = np.einsum('ij,ij->i', diff, diff2) / measurements.shape[1]
        rdm = RDMs(dissimilarities=np.array([rdm]),
                   dissimilarity_measure='Mahalanobis',
                   descriptors=dataset.descriptors)
        rdm.pattern_descriptors[descriptor] = desc
        rdm.descriptors['noise'] = noise
    return rdm
Beispiel #9
0
def calc_rdm_crossnobis(dataset, descriptor, noise=None, cv_descriptor=None):
    """
    calculates an RDM from an input dataset using Cross-nobis distance
    This performs leave one out crossvalidation over the cv_descriptor

        Args:
            dataset (pyrsa.data.DatasetBase):
                The dataset the RDM is computed from
            descriptor (String):
                obs_descriptor used to define the rows/columns of the RDM
                defaults to one row/column per row in the dataset
            noise (numpy.ndarray):
                dataset.n_channel x dataset.n_channel
                precision matrix used to calculate the RDM
            cv_descriptor (String):
                obs_descriptor which determines the cross-validation folds

        Returns:
            RDMs object with the one RDM
    """
    noise = _check_noise(noise, dataset.n_channel)
    if descriptor is None:
        raise ValueError('descriptor must be a string! Crossvalidation' +
                         'requires multiple measurements to be grouped')
    cv_folds = np.unique(np.array(dataset.obs_descriptors[cv_descriptor]))
    weights = []
    rdms = []
    for i_fold in cv_folds:
        data_train = dataset.subset_obs(cv_descriptor, i_fold)
        data_test = dataset.subset_obs(cv_descriptor,
                                       np.setdiff1d(cv_folds, i_fold))
        measurements_train, desc = average_dataset_by(data_train, descriptor)
        measurements_test, desc = average_dataset_by(data_test, descriptor)
        rdm = _calc_rdm_crossnobis_single(measurements_train,
                                          measurements_test, noise)
        rdms.append(rdm)
        weights.append(data_test.n_obs)
    rdms = np.array(rdms)
    weights = np.array(weights)
    rdm = np.einsum('ij,i->j', rdms, weights) / np.sum(weights)
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='crossnobis',
               descriptors=dataset.descriptors)
    if descriptor is None:
        rdm.pattern_descriptors['pattern'] = list(np.arange(rdm.n_cond))
    else:
        rdm.pattern_descriptors[descriptor] = desc
    rdm.descriptors['noise'] = noise
    rdm.descriptors['cv_descriptor'] = cv_descriptor
    return rdm
Beispiel #10
0
def calc_rdm_crossnobis(dataset, descriptor, noise=None, cv_descriptor=None):
    """
    calculates an RDM from an input dataset using Cross-nobis distance
    This performs leave one out crossvalidation over the cv_descriptor.

    As the minimum input provide a dataset and a descriptor-name to
    define the rows & columns of the RDM.
    You may pass a noise precision. If you don't an identity is assumed.
    Also a cv_descriptor can be passed to define the crossvalidation folds.
    It is recommended to do this, to assure correct calculations. If you do
    not, this function infers a split in order of the dataset, which is
    guaranteed to fail if there are any unbalances.

    This function also accepts a list of noise precision matricies.
    It is then assumed that this is the precision of the mean from
    the corresponding crossvalidation fold, i.e. if multiple measurements
    enter a fold, please compute the resulting noise precision in advance!

    To assert equal ordering in the folds the dataset is initially sorted
    according to the descriptor used to define the patterns.

    Args:
        dataset (pyrsa.data.dataset.DatasetBase):
            The dataset the RDM is computed from
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
            defaults to one row/column per row in the dataset
        noise (numpy.ndarray):
            dataset.n_channel x dataset.n_channel
            precision matrix used to calculate the RDM
            default: identity matrix, i.e. euclidean distance
        cv_descriptor (String):
            obs_descriptor which determines the cross-validation folds

    Returns:
        pyrsa.rdm.rdms.RDMs: RDMs object with the one RDM

    """
    noise = _check_noise(noise, dataset.n_channel)
    if descriptor is None:
        raise ValueError('descriptor must be a string! Crossvalidation' +
                         'requires multiple measurements to be grouped')
    if cv_descriptor is None:
        cv_desc = _gen_default_cv_descriptor(dataset, descriptor)
        dataset.obs_descriptors['cv_desc'] = cv_desc
        cv_descriptor = 'cv_desc'
    dataset.sort_by(descriptor)
    cv_folds = np.unique(np.array(dataset.obs_descriptors[cv_descriptor]))
    weights = []
    rdms = []
    if noise is None or (isinstance(noise, np.ndarray) and noise.ndim == 2):
        for i_fold in range(len(cv_folds)):
            fold = cv_folds[i_fold]
            data_test = dataset.subset_obs(cv_descriptor, fold)
            data_train = dataset.subset_obs(cv_descriptor,
                                            np.setdiff1d(cv_folds, fold))
            measurements_train, _, _ = \
                average_dataset_by(data_train, descriptor)
            measurements_test, _, _ = \
                average_dataset_by(data_test, descriptor)
            n_cond = measurements_train.shape[0]
            rdm = np.empty(int(n_cond * (n_cond - 1) / 2))
            k = 0
            for i_cond in range(n_cond - 1):
                for j_cond in range(i_cond + 1, n_cond):
                    diff_train = measurements_train[i_cond] \
                        - measurements_train[j_cond]
                    diff_test = measurements_test[i_cond] \
                        - measurements_test[j_cond]
                    if noise is None:
                        rdm[k] = np.sum(diff_train * diff_test)
                    else:
                        rdm[k] = np.sum(diff_train *
                                        np.matmul(noise, diff_test))
                    k += 1
            rdms.append(rdm)
            weights.append(data_test.n_obs)
    else:  # a list of noises was provided
        measurements = []
        variances = []
        for i_fold in range(len(cv_folds)):
            data = dataset.subset_obs(cv_descriptor, cv_folds[i_fold])
            measurements.append(average_dataset_by(data, descriptor)[0])
            variances.append(np.linalg.inv(noise[i_fold]))
        for i_fold in range(len(cv_folds)):
            for j_fold in range(i_fold + 1, len(cv_folds)):
                if i_fold != j_fold:
                    rdm = _calc_rdm_crossnobis_single(
                        measurements[i_fold], measurements[j_fold],
                        np.linalg.inv(variances[i_fold] + variances[j_fold]))
                    rdms.append(rdm)
    rdms = np.array(rdms)
    rdm = np.einsum('ij->j', rdms)
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='crossnobis',
               descriptors=dataset.descriptors)
    _, desc, _ = average_dataset_by(dataset, descriptor)
    rdm.pattern_descriptors[descriptor] = desc
    rdm.descriptors['noise'] = noise
    rdm.descriptors['cv_descriptor'] = cv_descriptor
    return rdm