Ejemplo n.º 1
0
def calc_rdm_correlation(dataset, descriptor=None):
    """
    calculates an RDM from an input dataset using correlation distance
    If multiple instances of the same condition are found in the dataset
    they are averaged.

    Args:
        dataset (rsatoolbox.data.DatasetBase):
            The dataset the RDM is computed from
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
            defaults to one row/column per row in the dataset

    Returns:
        rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM

    """
    ma, desc, descriptor = _parse_input(dataset, descriptor)
    ma = ma - ma.mean(axis=1, keepdims=True)
    ma /= np.sqrt(np.einsum('ij,ij->i', ma, ma))[:, None]
    rdm = 1 - np.einsum('ik,jk', ma, ma)
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='correlation',
               rdm_descriptors=deepcopy(dataset.descriptors))
    rdm.pattern_descriptors[descriptor] = desc
    return rdm
Ejemplo n.º 2
0
def calc_rdm_poisson(dataset, descriptor=None, prior_lambda=1,
                     prior_weight=0.1):
    """
    calculates an RDM from an input dataset using the symmetrized
    KL-divergence assuming a poisson distribution.
    If multiple instances of the same condition are found in the dataset
    they are averaged.

    Args:
        dataset (rsatoolbox.data.DatasetBase):
            The dataset the RDM is computed from
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
            defaults to one row/column per row in the dataset

    Returns:
        rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM

    """
    measurements, desc, descriptor = _parse_input(dataset, descriptor)
    measurements = (measurements + prior_lambda * prior_weight) \
        / (1 + prior_weight)
    kernel = measurements @ np.log(measurements).T
    rdm = np.expand_dims(np.diag(kernel), 0) + np.expand_dims(np.diag(kernel), 1)\
        - kernel - kernel.T
    rdm = _extract_triu_(rdm) / measurements.shape[1]
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='poisson',
               rdm_descriptors=deepcopy(dataset.descriptors))
    rdm.pattern_descriptors[descriptor] = desc
    return rdm
Ejemplo n.º 3
0
 def test_weighted_mean(self):
     """Weights passed or stored in a descriptor are used in average
     """
     from rsatoolbox.rdm.rdms import RDMs
     partial_rdms = RDMs(
         dissimilarities=array([
             [  1,   2, nan,   3, nan, nan],
             [  2,   1, nan,   4,   5,   6],
         ])
     )
     weights = array([
         [  1,   1, nan,   1, nan, nan],
         [  2,   2, nan,   2,   2,   2],
     ])
     assert_almost_equal(
         partial_rdms.mean(weights=weights).dissimilarities,
         array([[1.6667, 1.3333, nan, 3.6667, 5.0000, 6.0000]]),
         decimal=3
     )
     partial_rdms.rdm_descriptors['theWeights'] = weights
     assert_almost_equal(
         partial_rdms.mean(weights='theWeights').dissimilarities,
         array([[1.6667, 1.3333, nan, 3.6667, 5.0000, 6.0000]]),
         decimal=3
     )
Ejemplo n.º 4
0
def calc_rdm_poisson_cv(dataset, descriptor=None, prior_lambda=1,
                        prior_weight=0.1, cv_descriptor=None):
    """
    calculates an RDM from an input dataset using the crossvalidated
    symmetrized KL-divergence assuming a poisson distribution

    To assert equal ordering in the folds the dataset is initially sorted
    according to the descriptor used to define the patterns.

    Args:
        dataset (rsatoolbox.data.DatasetBase):
            The dataset the RDM is computed from
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
            defaults to one row/column per row in the dataset
        cv_descriptor (str): The descriptor that indicates the folds
            to use for crossvalidation

    Returns:
        rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM

    """
    if descriptor is None:
        raise ValueError('descriptor must be a string! Crossvalidation' +
                         'requires multiple measurements to be grouped')
    if cv_descriptor is None:
        cv_desc = _gen_default_cv_descriptor(dataset, descriptor)
        dataset.obs_descriptors['cv_desc'] = cv_desc
        cv_descriptor = 'cv_desc'

    dataset.sort_by(descriptor)
    cv_folds = np.unique(np.array(dataset.obs_descriptors[cv_descriptor]))
    for i_fold in range(len(cv_folds)):
        fold = cv_folds[i_fold]
        data_test = dataset.subset_obs(cv_descriptor, fold)
        data_train = dataset.subset_obs(cv_descriptor,
                                        np.setdiff1d(cv_folds, fold))
        measurements_train, _, _ = average_dataset_by(data_train, descriptor)
        measurements_test, _, _ = average_dataset_by(data_test, descriptor)
        measurements_train = (measurements_train
                              + prior_lambda * prior_weight) \
            / (1 + prior_weight)
        measurements_test = (measurements_test
                             + prior_lambda * prior_weight) \
            / (1 + prior_weight)
        kernel = measurements_train @ np.log(measurements_test).T
        rdm = np.expand_dims(np.diag(kernel), 0) + np.expand_dims(np.diag(kernel), 1)\
            - kernel - kernel.T
        rdm = _extract_triu_(rdm) / measurements_train.shape[1]
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='poisson_cv',
               rdm_descriptors=deepcopy(dataset.descriptors))
    _, desc, _ = average_dataset_by(dataset, descriptor)
    rdm.pattern_descriptors[descriptor] = desc
    return rdm
Ejemplo n.º 5
0
 def test_mean_no_weights(self):
     """RDMs.mean() returns an RDMs with the nan omitted mean of the rdms
     """
     from rsatoolbox.rdm.rdms import RDMs
     partial_rdms = RDMs(
         dissimilarities=array([
             [  1,   2, nan,   3, nan, nan],
             [  2,   1, nan,   4,   5,   6],
         ])
     )
     assert_almost_equal(
         partial_rdms.mean().dissimilarities,
         array([[ 1.5,  1.5, nan, 3.5, 5, 6]])
     )
Ejemplo n.º 6
0
 def test_rescale(self):
     """The rescale function bring the RDMs as close together as possible
     """
     from rsatoolbox.rdm.rdms import RDMs
     from rsatoolbox.rdm.combine import rescale
     partial=array([
         [  1,   2, nan,   3, nan, nan],
         [nan, nan, nan,   4,   5,   6],
     ])
     partial_rdms = RDMs(
         dissimilarities=partial
     )
     rescaled_rdms = rescale(partial_rdms)
     rescaled = rescaled_rdms.dissimilarities
     assert_almost_equal(rescaled[0, 3], rescaled[1, 3], decimal=4)
     assert_almost_equal(
         pearsonr(non_nan(partial), non_nan(rescaled))[0],
         1,
         decimal=7
     )
     actual_rescaled = array([
         [0.1438, 0.2877, nan, 0.4315,    nan,    nan],
         [   nan,    nan, nan, 0.4316, 0.5395, 0.6474]
     ])
     assert_almost_equal(rescaled, actual_rescaled, decimal=4)
     assert_array_equal(
         rescaled_rdms.rdm_descriptors.get('rescalingWeights'),
         array([
             [  1,   4, nan,    9,  nan,  nan],
             [nan, nan, nan,   16,   25,   36],
         ])
     )
Ejemplo n.º 7
0
    def test_from_partials_with_list_of_pattern_descriptors(self):
        """Where the user explicitly chooses the patterns

        We pass a list with a single RDMs object containing one RDM,
        then specify one additional pattern not covered in the RDM.
        """
        from rsatoolbox.rdm.rdms import RDMs
        from rsatoolbox.rdm.combine import from_partials
        rdms1 = RDMs(
            dissimilarities=array([[1, 2, 3]]),
            dissimilarity_measure='measure',
            pattern_descriptors=dict(conds=['b', 'c', 'd']),
        )
        rdms = from_partials([rdms1], all_patterns=['a', 'b', 'c', 'd'])
        self.assertEqual(rdms.n_rdm, 1)
        self.assertEqual(rdms.n_cond, 4)
        assert_array_equal(
            rdms.pattern_descriptors.get('conds'),
            ['a', 'b', 'c', 'd']
        )
        assert_array_equal(
            rdms.dissimilarities,
            array([
                [nan, nan, nan,   1,   2,   3],
            ])
        )
Ejemplo n.º 8
0
 def test_from_partials_based_on_list_of_rdms_objects(self):
     """In this case the complete list of conditions is determined
     from the RDMs passed.
     """
     from rsatoolbox.rdm.rdms import RDMs
     from rsatoolbox.rdm.combine import from_partials
     rdms1 = RDMs(
         dissimilarities=array([[1, 2, 3]]),
         dissimilarity_measure='shared_measure',
         descriptors=dict(shared_desc='shared_val', diff_desc='one'),
         rdm_descriptors=dict(rdesc=['foo1']),
         pattern_descriptors=dict(conds=['a', 'b', 'c']),
     )
     rdms23 = RDMs(
         dissimilarities=array([[4, 5, 6], [7, 8, 9]]),
         dissimilarity_measure='shared_measure',
         descriptors=dict(shared_desc='shared_val', diff_desc='two-three'),
         rdm_descriptors=dict(rdesc=['foo2', 'foo3']),
         pattern_descriptors=dict(conds=['b', 'c', 'd']),
     )
     rdms = from_partials([rdms1, rdms23])
     self.assertEqual(rdms.n_rdm, 3)
     self.assertEqual(rdms.n_cond, 4)
     self.assertEqual(rdms.dissimilarity_measure, 'shared_measure')
     self.assertEqual(rdms.descriptors.get('shared_desc'), 'shared_val')
     assert_array_equal(
         rdms.rdm_descriptors.get('diff_desc'),
         ['one', 'two-three', 'two-three']
     )
     assert_array_equal(
         rdms.rdm_descriptors.get('rdesc'),
         ['foo1', 'foo2', 'foo3']
     )
     assert_array_equal(
         rdms.pattern_descriptors.get('conds'),
         ['a', 'b', 'c', 'd']
     )
     assert_array_equal(
         rdms.dissimilarities,
         array([
             [  1,   2, nan,   3, nan, nan],
             [nan, nan, nan,   4,   5,   6],
             [nan, nan, nan,   7,   8,   9]
         ])
     )
Ejemplo n.º 9
0
def load_rdms(fpath: str, sort: bool = True) -> RDMs:
    """Read a Meadows results file and return any RDMs as an rsatoolbox object

    Args:
        fpath (str): path to .mat Meadows results file
        sort (bool): whether to sort the RDM based on the stimulus names

    Raises:
        ValueError: Will raise an error if the file is missing an expected
            variable. This can happen if the file does not contain MA task
            data.

    Returns:
        RDMs: All rdms found in the data file as an RDMs object
    """
    info = extract_filename_segments(fpath)
    if info['filetype'] == 'mat':
        utvs, stimuli, pnames, tnames, tidx = load_rdms_comps_mat(fpath, info)
    elif info['filetype'] == 'json':
        utvs, stimuli, pnames, tnames, tidx = load_rdms_comps_json(fpath, info)
    else:
        raise ValueError('Unsupported file type')

    conds = [f.split('.')[0] for f in stimuli]

    rdm_descriptors = {}
    rdm_descriptors['participant'] = pnames
    if tnames is not None:
        rdm_descriptors['task'] = tnames
    if tidx is not None:
        rdm_descriptors['task_index'] = tidx

    rdms = RDMs(
        utvs,
        dissimilarity_measure='euclidean',
        descriptors=dict(experiment_name=info['experiment_name']),
        rdm_descriptors=rdm_descriptors,
        pattern_descriptors=dict(conds=conds),
    )
    if sort:
        rdms.sort_by(conds='alpha')
    return rdms
Ejemplo n.º 10
0
def calc_rdm_euclid(dataset, descriptor=None):
    """
    Args:
        dataset (rsatoolbox.data.DatasetBase):
            The dataset the RDM is computed from
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
            defaults to one row/column per row in the dataset
    Returns:
        rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM
    """

    measurements, desc, descriptor = _parse_input(dataset, descriptor)
    sum_sq_measurements = np.sum(measurements**2, axis=1, keepdims=True)
    rdm = sum_sq_measurements + sum_sq_measurements.T \
        - 2 * np.dot(measurements, measurements.T)
    rdm = _extract_triu_(rdm) / measurements.shape[1]
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='squared euclidean',
               rdm_descriptors=deepcopy(dataset.descriptors))
    rdm.pattern_descriptors[descriptor] = desc
    return rdm
Ejemplo n.º 11
0
def calc_rdm_mahalanobis(dataset, descriptor=None, noise=None):
    """
    calculates an RDM from an input dataset using mahalanobis distance
    If multiple instances of the same condition are found in the dataset
    they are averaged.

    Args:
        dataset (rsatoolbox.data.dataset.DatasetBase):
            The dataset the RDM is computed from
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
            defaults to one row/column per row in the dataset
        noise (numpy.ndarray):
            dataset.n_channel x dataset.n_channel
            precision matrix used to calculate the RDM
            default: identity matrix, i.e. euclidean distance

    Returns:
        rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM

    """
    if noise is None:
        rdm = calc_rdm_euclid(dataset, descriptor)
    else:
        measurements, desc, descriptor = _parse_input(dataset, descriptor)
        noise = _check_noise(noise, dataset.n_channel)
        kernel = measurements @ noise @ measurements.T
        rdm = np.expand_dims(np.diag(kernel), 0) + np.expand_dims(np.diag(kernel), 1)\
            - 2 * kernel
        rdm = _extract_triu_(rdm) / measurements.shape[1]
        rdm = RDMs(dissimilarities=np.array([rdm]),
                   dissimilarity_measure='squared mahalanobis',
                   rdm_descriptors=deepcopy(dataset.descriptors))
        rdm.pattern_descriptors[descriptor] = desc
        rdm.descriptors['noise'] = noise
    return rdm
Ejemplo n.º 12
0
 def test_rescale_setsize(self):
     """The rescale function bring the RDMs as close together as possible
     """
     from rsatoolbox.rdm.rdms import RDMs
     from rsatoolbox.rdm.combine import rescale
     partial_rdms = RDMs(
         dissimilarities=array([
             [  1,   2, nan,   3, nan, nan],
             [nan, nan, nan,   4,   5, nan],
         ])
     )
     rescaled_rdms = rescale(partial_rdms, method='setsize')
     assert_almost_equal(
         rescaled_rdms.rdm_descriptors.get('rescalingWeights'),
         array([
             [0.3333, 0.3333, nan, 0.3333,  nan,  nan],
             [   nan,    nan, nan,    0.5,   0.5, nan],
         ]),
         decimal=4
     )
Ejemplo n.º 13
0
def calc_rdm_crossnobis(dataset, descriptor, noise=None,
                        cv_descriptor=None):
    """
    calculates an RDM from an input dataset using Cross-nobis distance
    This performs leave one out crossvalidation over the cv_descriptor.

    As the minimum input provide a dataset and a descriptor-name to
    define the rows & columns of the RDM.
    You may pass a noise precision. If you don't an identity is assumed.
    Also a cv_descriptor can be passed to define the crossvalidation folds.
    It is recommended to do this, to assure correct calculations. If you do
    not, this function infers a split in order of the dataset, which is
    guaranteed to fail if there are any unbalances.

    This function also accepts a list of noise precision matricies.
    It is then assumed that this is the precision of the mean from
    the corresponding crossvalidation fold, i.e. if multiple measurements
    enter a fold, please compute the resulting noise precision in advance!

    To assert equal ordering in the folds the dataset is initially sorted
    according to the descriptor used to define the patterns.

    Args:
        dataset (rsatoolbox.data.dataset.DatasetBase):
            The dataset the RDM is computed from
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
            defaults to one row/column per row in the dataset
        noise (numpy.ndarray):
            dataset.n_channel x dataset.n_channel
            precision matrix used to calculate the RDM
            default: identity matrix, i.e. euclidean distance
        cv_descriptor (String):
            obs_descriptor which determines the cross-validation folds

    Returns:
        rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM

    """
    noise = _check_noise(noise, dataset.n_channel)
    if noise is None:
        noise = np.eye(dataset.n_channel)
    if descriptor is None:
        raise ValueError('descriptor must be a string! Crossvalidation' +
                         'requires multiple measurements to be grouped')
    if cv_descriptor is None:
        cv_desc = _gen_default_cv_descriptor(dataset, descriptor)
        dataset.obs_descriptors['cv_desc'] = cv_desc
        cv_descriptor = 'cv_desc'
    dataset.sort_by(descriptor)
    cv_folds = np.unique(np.array(dataset.obs_descriptors[cv_descriptor]))
    rdms = []
    if (noise is None) or (isinstance(noise, np.ndarray) and noise.ndim == 2):
        for i_fold in range(len(cv_folds)):
            fold = cv_folds[i_fold]
            data_test = dataset.subset_obs(cv_descriptor, fold)
            data_train = dataset.subset_obs(cv_descriptor,
                                            np.setdiff1d(cv_folds, fold))
            measurements_train, _, _ = \
                average_dataset_by(data_train, descriptor)
            measurements_test, _, _ = \
                average_dataset_by(data_test, descriptor)
            rdm = _calc_rdm_crossnobis_single(
                measurements_train, measurements_test, noise)
            rdms.append(rdm)
    else:  # a list of noises was provided
        measurements = []
        variances = []
        for i_fold in range(len(cv_folds)):
            data = dataset.subset_obs(cv_descriptor, cv_folds[i_fold])
            measurements.append(average_dataset_by(data, descriptor)[0])
            variances.append(np.linalg.inv(noise[i_fold]))
        for i_fold in range(len(cv_folds)):
            for j_fold in range(i_fold + 1, len(cv_folds)):
                if i_fold != j_fold:
                    rdm = _calc_rdm_crossnobis_single(
                        measurements[i_fold], measurements[j_fold],
                        np.linalg.inv(
                            (variances[i_fold] + variances[j_fold]) / 2)
                        )
                    rdms.append(rdm)
    rdms = np.array(rdms)
    rdm = np.einsum('ij->j', rdms) / rdms.shape[0]
    rdm = RDMs(dissimilarities=np.array([rdm]),
               dissimilarity_measure='crossnobis',
               rdm_descriptors=deepcopy(dataset.descriptors))
    _, desc, _ = average_dataset_by(dataset, descriptor)
    rdm.pattern_descriptors[descriptor] = desc
    rdm.descriptors['noise'] = noise
    rdm.descriptors['cv_descriptor'] = cv_descriptor
    return rdm
Ejemplo n.º 14
0
def calc_rdm_unbalanced(dataset,
                        method='euclidean',
                        descriptor=None,
                        noise=None,
                        cv_descriptor=None,
                        prior_lambda=1,
                        prior_weight=0.1,
                        weighting='number',
                        enforce_same=False):
    """
    calculate a RDM from an input dataset for unbalanced datasets.

    Args:
        dataset (rsatoolbox.data.dataset.DatasetBase):
            The dataset the RDM is computed from
        method (String):
            a description of the dissimilarity measure (e.g. 'Euclidean')
        descriptor (String):
            obs_descriptor used to define the rows/columns of the RDM
        noise (numpy.ndarray):
            dataset.n_channel x dataset.n_channel
            precision matrix used to calculate the RDM
            used only for Mahalanobis and Crossnobis estimators
            defaults to an identity matrix, i.e. euclidean distance

    Returns:
        rsatoolbox.rdm.rdms.RDMs: RDMs object with the one RDM

    """
    if descriptor is None:
        dataset = deepcopy(dataset)
        dataset.obs_descriptors['index'] = np.arange(dataset.n_obs)
        descriptor = 'index'
    if isinstance(dataset, Iterable):
        rdms = []
        for i_dat, dat in enumerate(dataset):
            if noise is None:
                rdms.append(
                    calc_rdm_unbalanced(dat,
                                        method=method,
                                        descriptor=descriptor,
                                        cv_descriptor=cv_descriptor,
                                        prior_lambda=prior_lambda,
                                        prior_weight=prior_weight,
                                        weighting=weighting,
                                        enforce_same=enforce_same))
            elif isinstance(noise, np.ndarray) and noise.ndim == 2:
                rdms.append(
                    calc_rdm_unbalanced(dat,
                                        method=method,
                                        descriptor=descriptor,
                                        noise=noise,
                                        cv_descriptor=cv_descriptor,
                                        prior_lambda=prior_lambda,
                                        prior_weight=prior_weight,
                                        weighting=weighting,
                                        enforce_same=enforce_same))
            elif isinstance(noise, Iterable):
                rdms.append(
                    calc_rdm_unbalanced(dat,
                                        method=method,
                                        descriptor=descriptor,
                                        noise=noise[i_dat],
                                        cv_descriptor=cv_descriptor,
                                        prior_lambda=prior_lambda,
                                        prior_weight=prior_weight,
                                        weighting=weighting,
                                        enforce_same=enforce_same))
        rdm = concat(rdms)
    else:
        rdm = []
        weights = []
        self_sim = []
        if method == 'crossnobis' or method == 'poisson_cv':
            if cv_descriptor is None:
                if 'index' not in dataset.obs_descriptors.keys():
                    dataset.obs_descriptors['index'] = np.arange(dataset.n_obs)
                cv_descriptor = 'index'
                warnings.warn('cv_descriptor not set, using index for now.' +
                              'This will only remove self-similarities.' +
                              'Effectively this assumes independent trials')
        unique_cond = set(dataset.obs_descriptors[descriptor])
        for i, i_des in enumerate(unique_cond):
            v, _ = calc_one_similarity(dataset,
                                       descriptor,
                                       i_des,
                                       i_des,
                                       method=method,
                                       noise=noise,
                                       weighting=weighting,
                                       prior_lambda=prior_lambda,
                                       prior_weight=prior_weight,
                                       cv_descriptor=cv_descriptor)
            self_sim.append(v)
            for j, j_des in enumerate(unique_cond):
                if j > i:
                    v, w = calc_one_similarity(dataset,
                                               descriptor,
                                               i_des,
                                               j_des,
                                               method=method,
                                               noise=noise,
                                               weighting=weighting,
                                               prior_lambda=prior_lambda,
                                               prior_weight=prior_weight,
                                               cv_descriptor=cv_descriptor)
                    rdm.append(v)
                    weights.append(w)
        row_idx, col_idx = row_col_indicator_rdm(len(unique_cond))
        self_sim = np.array(self_sim)
        rdm = np.array(rdm)
        rdm = row_idx @ self_sim + col_idx @ self_sim - 2 * rdm
        rdm = RDMs(dissimilarities=np.array([rdm]),
                   dissimilarity_measure=method,
                   rdm_descriptors=deepcopy(dataset.descriptors))
        rdm.pattern_descriptors[descriptor] = list(unique_cond)
        rdm.rdm_descriptors['weights'] = [weights]
    return rdm