コード例 #1
0
def boot_noise_ceiling(rdms, method='cosine', rdm_descriptor='index'):
    """ calculates a noise ceiling by leave one out & full set

    Args:
        rdms(rsatoolbox.rdm.RDMs): data to calculate noise ceiling
        method(string): comparison method to use
        rdm_descriptor(string): descriptor to group rdms

    Returns:
        list: [lower nc-bound, upper nc-bound]

    """
    _, test_set, ceil_set = sets_leave_one_out_rdm(rdms, rdm_descriptor)
    pred_test = pool_rdm(rdms, method=method)
    noise_min = []
    noise_max = []
    for i in range(len(ceil_set)):
        train = ceil_set[i]
        test = test_set[i]
        pred_train = pool_rdm(train[0], method=method)
        noise_min.append(np.mean(compare(pred_train, test[0], method)))
        noise_max.append(np.mean(compare(pred_test, test[0], method)))
    noise_min = np.mean(np.array(noise_min))
    noise_max = np.mean(np.array(noise_max))
    return noise_min, noise_max
コード例 #2
0
ファイル: fitter.py プロジェクト: caiw/pyrsa
def _loss(theta,
          model,
          data,
          method='cosine',
          sigma_k=None,
          pattern_descriptor=None,
          pattern_idx=None,
          ridge_weight=0):
    """Method for calculating a loss for a model and parameter combination

    Args:
        theta(numpy.ndarray): evaluated parameter value
        model(Model): the model to be fit
        data(rsatoolbox.rdm.RDMs): data to be fit
        method(String, optional): evaluation metric The default is 'cosine'.
        pattern_idx(numpy.ndarray, optional)
            sampled patterns The default is None.
        pattern_descriptor (String, optional)
            descriptor used for fitting. The default is None.
        sigma_k(matrix): pattern-covariance matrix
            used only for whitened distances (ending in _cov)
            to compute the covariance matrix for rdms
        ridge_weight(float): weight for a ridge regularisation

    Returns:

        numpy.ndarray: loss

    """
    pred = model.predict_rdm(theta)
    if not (pattern_idx is None or pattern_descriptor is None):
        pred = pred.subsample_pattern(pattern_descriptor, pattern_idx)
    return -np.mean(compare(pred, data, method=method, sigma_k=sigma_k)) \
        + np.sum(theta * theta) * ridge_weight
コード例 #3
0
ファイル: fitter.py プロジェクト: caiw/pyrsa
def fit_select(model,
               data,
               method='cosine',
               pattern_idx=None,
               pattern_descriptor=None,
               sigma_k=None):
    """ fits selection models by evaluating each rdm and selcting the one
    with best performance. Works only for ModelSelect

    Args:
        model(rsatoolbox.model.Model): model to be fit
        data(rsatoolbox.rdm.RDMs): Data to fit to
        method(String): Evaluation method
        pattern_idx(numpy.ndarray): Which patterns are sampled
        pattern_descriptor(String): Which descriptor is used
        sigma_k(matrix): pattern-covariance matrix
            used only for whitened distances (ending in _cov)
            to compute the covariance matrix for rdms

    Returns:
        theta(int): parameter vector

    """
    evaluations = np.zeros(model.n_rdm)
    for i_rdm in range(model.n_rdm):
        pred = model.predict_rdm(i_rdm)
        if not (pattern_idx is None or pattern_descriptor is None):
            pred = pred.subsample_pattern(pattern_descriptor, pattern_idx)
        evaluations[i_rdm] = np.mean(
            compare(pred, data, method=method, sigma_k=sigma_k))
    theta = np.argmax(evaluations)
    return theta
コード例 #4
0
def eval_fixed(models, data, theta=None, method='cosine'):
    """evaluates models on data, without any bootstrapping or
    cross-validation

    Args:
        models(list of rsatoolbox.model.Model or list): models to be evaluated
        data(rsatoolbox.rdm.RDMs): data to evaluate on
        theta(numpy.ndarray): parameter vector for the models
        method(string): comparison method to use

    Returns:
        float: evaluation

    """
    models, evaluations, theta, _ = input_check_model(models, theta, None, 1)
    evaluations = np.repeat(np.expand_dims(evaluations, -1), data.n_rdm, -1)
    for k, model in enumerate(models):
        rdm_pred = model.predict_rdm(theta=theta[k])
        evaluations[k] = compare(rdm_pred, data, method)
    evaluations = evaluations.reshape((1, len(models), data.n_rdm))
    noise_ceil = boot_noise_ceiling(data,
                                    method=method,
                                    rdm_descriptor='index')
    variances = np.cov(evaluations[0], ddof=1) \
        / evaluations.shape[-1]
    dof = evaluations.shape[-1] - 1
    result = Result(models,
                    evaluations,
                    method=method,
                    cv_method='fixed',
                    noise_ceiling=noise_ceil,
                    variances=variances,
                    dof=dof)
    return result
コード例 #5
0
def eval_bootstrap_rdm(models,
                       data,
                       theta=None,
                       method='cosine',
                       N=1000,
                       rdm_descriptor='index',
                       boot_noise_ceil=True):
    """evaluates models on data
    performs bootstrapping to get a sampling distribution

    Args:
        models(rsatoolbox.model.Model or list of these): models to be evaluated
        data(rsatoolbox.rdm.RDMs): data to evaluate on
        theta(numpy.ndarray): parameter vector for the models
        method(string): comparison method to use
        N(int): number of samples
        rdm_descriptor(string): rdm_descriptor to group rdms for bootstrap

    Returns:
        numpy.ndarray: vector of evaluations

    """
    models, evaluations, theta, _ = input_check_model(models, theta, None, N)
    noise_min = []
    noise_max = []
    for i in tqdm.trange(N):
        sample, rdm_idx = bootstrap_sample_rdm(data, rdm_descriptor)
        for j, mod in enumerate(models):
            rdm_pred = mod.predict_rdm(theta=theta[j])
            evaluations[i, j] = np.mean(compare(rdm_pred, sample, method))
        if boot_noise_ceil:
            noise_min_sample, noise_max_sample = boot_noise_ceiling(
                sample, method=method, rdm_descriptor=rdm_descriptor)
            noise_min.append(noise_min_sample)
            noise_max.append(noise_max_sample)
    if boot_noise_ceil:
        eval_ok = np.isfinite(evaluations[:, 0])
        noise_ceil = np.array([noise_min, noise_max])
        variances = np.cov(
            np.concatenate([evaluations[eval_ok, :].T, noise_ceil[:,
                                                                  eval_ok]]))
    else:
        eval_ok = np.isfinite(evaluations[:, 0])
        noise_ceil = np.array(
            boot_noise_ceiling(data,
                               method=method,
                               rdm_descriptor=rdm_descriptor))
        variances = np.cov(evaluations[eval_ok, :].T)
    dof = data.n_rdm - 1
    variances = np.cov(evaluations.T)
    result = Result(models,
                    evaluations,
                    method=method,
                    cv_method='bootstrap_rdm',
                    noise_ceiling=noise_ceil,
                    variances=variances,
                    dof=dof)
    return result
コード例 #6
0
ファイル: test_model.py プロジェクト: caiw/pyrsa
 def test_two_rdms_nan(self):
     from rsatoolbox.model import ModelInterpolate, ModelWeighted
     from rsatoolbox.model.fitter import fit_regress, fit_optimize_positive
     from rsatoolbox.rdm import concat, compare
     rdms = self.rdms.subsample_pattern('index', [0, 1, 1, 3, 4, 5])
     model_rdms = concat([rdms[0], rdms[1]])
     model_weighted = ModelWeighted('m_weighted', model_rdms)
     model_interpolate = ModelInterpolate('m_interpolate', model_rdms)
     for i_method in ['cosine', 'corr', 'cosine_cov', 'corr_cov']:
         theta_m_i = model_interpolate.fit(rdms, method=i_method)
         theta_m_w = model_weighted.fit(rdms, method=i_method)
         theta_m_w_pos = fit_optimize_positive(model_weighted,
                                               rdms,
                                               method=i_method)
         theta_m_w_linear = fit_regress(model_weighted,
                                        rdms,
                                        method=i_method)
         eval_m_i = np.mean(
             compare(model_weighted.predict_rdm(theta_m_i),
                     rdms,
                     method=i_method))
         eval_m_w = np.mean(
             compare(model_weighted.predict_rdm(theta_m_w),
                     rdms,
                     method=i_method))
         eval_m_w_pos = np.mean(
             compare(model_weighted.predict_rdm(theta_m_w_pos),
                     rdms,
                     method=i_method))
         eval_m_w_linear = np.mean(
             compare(model_weighted.predict_rdm(theta_m_w_linear),
                     rdms,
                     method=i_method))
         self.assertAlmostEqual(
             eval_m_i,
             eval_m_w_pos,
             places=4,
             msg='weighted fit differs from interpolation fit!' +
             '\nfor %s' % i_method)
         self.assertAlmostEqual(
             eval_m_w,
             eval_m_w_linear,
             places=4,
             msg='regression fit differs from optimization fit!' +
             '\nfor %s' % i_method)
コード例 #7
0
def cv_noise_ceiling(rdms,
                     ceil_set,
                     test_set,
                     method='cosine',
                     pattern_descriptor='index'):
    """ calculates the noise ceiling for crossvalidation.
    The upper bound is calculated by pooling all rdms for the appropriate
    patterns in the testsets.
    the lower bound is calculated by using only the appropriate rdms
    from ceil_set for training.

    Args:
        rdms(rsatoolbox.rdm.RDMs): complete data
        ceil_set(list): a list of the training RDMs with 2-tuple entries:
            (RDMs, pattern_idx)
        test_set(list): a list of the test RDMs with 2-tuple entries:
            (RDMs, pattern_idx)
        method(string): comparison method to use
        pattern_descriptor(string): descriptor to group patterns

    Returns:
        list: lower nc-bound, upper nc-bound

    """
    assert len(ceil_set) == len(test_set), \
        'train_set and test_set must have the same length'
    noise_min = []
    noise_max = []
    for i in range(len(ceil_set)):
        train = ceil_set[i]
        test = test_set[i]
        pred_train = pool_rdm(train[0], method=method)
        pred_train = pred_train.subsample_pattern(by=pattern_descriptor,
                                                  value=test[1])
        pred_test = pool_rdm(rdms, method=method)
        pred_test = pred_test.subsample_pattern(by=pattern_descriptor,
                                                value=test[1])
        noise_min.append(np.mean(compare(pred_train, test[0], method)))
        noise_max.append(np.mean(compare(pred_test, test[0], method)))
    noise_min = np.mean(np.array(noise_min))
    noise_max = np.mean(np.array(noise_max))
    return noise_min, noise_max
コード例 #8
0
ファイル: test_demo.py プロジェクト: caiw/pyrsa
    def test_temporal_rsa(self):
        import numpy as np
        import matplotlib.pyplot as plt
        import rsatoolbox
        import pickle
        from rsatoolbox.rdm import calc_rdm_movie

        import os
        path = os.path.dirname(os.path.abspath(__file__))
        dat = pickle.load(
            open(
                os.path.join(path, '..', 'demos', "TemporalSampleData",
                             "meg_sample_data.pkl"), "rb"))
        measurements = dat['data']
        cond_names = [x for x in dat['cond_names'].keys()]
        cond_idx = dat['cond_idx']
        channel_names = dat['channel_names']
        times = dat['times']
        print(
            'there are %d observations (trials), %d channels, and %d time-points\n'
            % (measurements.shape))
        print('conditions:')
        print(cond_names)

        fig, ax = plt.subplots(1, 2, figsize=(12, 4))
        ax = ax.flatten()
        for jj, chan in enumerate(channel_names[:2]):
            for ii, cond_ii in enumerate(np.unique(cond_idx)):
                mn = measurements[cond_ii == cond_idx, jj, :].mean(0).squeeze()
                ax[jj].plot(times, mn, label=cond_names[ii])
                ax[jj].set_title(chan)
        ax[jj].legend()
        tim_des = {'time': times}
        des = {'session': 0, 'subj': 0}
        obs_des = {'conds': cond_idx}
        chn_des = {'channels': channel_names}
        data = rsatoolbox.data.TemporalDataset(measurements,
                                               descriptors=des,
                                               obs_descriptors=obs_des,
                                               channel_descriptors=chn_des,
                                               time_descriptors=tim_des)
        data.sort_by('conds')
        print('shape of original measurements')
        print(data.measurements.shape)
        data_split_time = data.split_time('time')
        print('\nafter splitting')
        print(len(data_split_time))
        print(data_split_time[0].measurements.shape)
        print('shape of original measurements')
        print(data.measurements.shape)
        data_subset_time = data.subset_time('time', t_from=-.1, t_to=.5)
        print('\nafter subsetting')
        print(data_subset_time.measurements.shape)
        print(data_subset_time.time_descriptors['time'][0])
        bins = np.reshape(tim_des['time'], [-1, 2])
        print(len(bins))
        print(bins[0])
        print('shape of original measurements')
        print(data.measurements.shape)
        data_binned = data.bin_time('time', bins=bins)
        print('\nafter binning')
        print(data_binned.measurements.shape)
        print(data_binned.time_descriptors['time'][0])
        print('shape of original measurements')
        print(data.measurements.shape)
        data_dataset = data.convert_to_dataset('time')
        print('\nafter binning')
        print(data_dataset.measurements.shape)
        print(data_dataset.obs_descriptors['time'][0])
        rdms_data = calc_rdm_movie(data,
                                   method='euclidean',
                                   descriptor='conds')
        print(rdms_data)
        rdms_data_binned = calc_rdm_movie(data,
                                          method='euclidean',
                                          descriptor='conds',
                                          bins=bins)
        print(rdms_data_binned)
        plt.figure(figsize=(10, 15))
        # add formated time as rdm_descriptor
        rdms_data_binned.rdm_descriptors['time_formatted'] = [
            '%0.0f ms' % (np.round(x * 1000, 2))
            for x in rdms_data_binned.rdm_descriptors['time']
        ]

        rsatoolbox.vis.show_rdm(rdms_data_binned,
                                pattern_descriptor='conds',
                                rdm_descriptor='time_formatted')
        from rsatoolbox.rdm import get_categorical_rdm
        rdms_model_in = get_categorical_rdm(['%d' % x for x in range(4)])
        rdms_model_lr = get_categorical_rdm(['l', 'r', 'l', 'r'])
        rdms_model_av = get_categorical_rdm(['a', 'a', 'v', 'v'])
        model_names = ['independent', 'left/right', 'audio/visual']
        # append in one RDMs object
        model_rdms = rdms_model_in
        model_rdms.append(rdms_model_lr)
        model_rdms.append(rdms_model_av)
        model_rdms.rdm_descriptors['model_names'] = model_names
        model_rdms.pattern_descriptors['cond_names'] = cond_names
        plt.figure(figsize=(10, 10))
        rsatoolbox.vis.show_rdm(model_rdms,
                                rdm_descriptor='model_names',
                                pattern_descriptor='cond_names')
        from rsatoolbox.rdm import compare
        r = []
        for mod in model_rdms:
            r.append(compare(mod, rdms_data_binned, method='cosine'))
        for i, r_ in enumerate(r):
            plt.plot(rdms_data_binned.rdm_descriptors['time'],
                     r_.squeeze(),
                     label=model_names[i])
        plt.xlabel('time')
        plt.ylabel('model-data cosine similarity')
        plt.legend()
コード例 #9
0
def crossval(models,
             rdms,
             train_set,
             test_set,
             ceil_set=None,
             method='cosine',
             fitter=None,
             pattern_descriptor='index',
             calc_noise_ceil=True):
    """evaluates models on cross-validation sets

    Args:
        models(rsatoolbox.model.Model): models to be evaluated
        rdms(rsatoolbox.rdm.RDMs): full dataset
        train_set(list): a list of the training RDMs with 2-tuple entries:
            (RDMs, pattern_idx)
        test_set(list): a list of the test RDMs with 2-tuple entries:
            (RDMs, pattern_idx)
        method(string): comparison method to use
        pattern_descriptor(string): descriptor to group patterns

    Returns:
        numpy.ndarray: vector of evaluations

    """
    assert len(train_set) == len(test_set), \
        'train_set and test_set must have the same length'
    if ceil_set is not None:
        assert len(ceil_set) == len(test_set), \
            'ceil_set and test_set must have the same length'
    if isinstance(models, Model):
        models = [models]
    evaluations = []
    noise_ceil = []
    for i in range(len(train_set)):
        train = train_set[i]
        test = test_set[i]
        if (train[0].n_rdm == 0 or test[0].n_rdm == 0 or train[0].n_cond <= 2
                or test[0].n_cond <= 2):
            evals = np.empty(len(models)) * np.nan
        else:
            models, evals, _, fitter = \
                input_check_model(models, None, fitter)
            for j, model in enumerate(models):
                theta = fitter[j](model,
                                  train[0],
                                  method=method,
                                  pattern_idx=train[1],
                                  pattern_descriptor=pattern_descriptor)
                pred = model.predict_rdm(theta)
                pred = pred.subsample_pattern(by=pattern_descriptor,
                                              value=test[1])
                evals[j] = np.mean(compare(pred, test[0], method))
            if ceil_set is None and calc_noise_ceil:
                noise_ceil.append(
                    boot_noise_ceiling(rdms.subsample_pattern(
                        by=pattern_descriptor, value=test[1]),
                                       method=method))
        evaluations.append(evals)
    evaluations = np.array(evaluations).T  # .T to switch models/set order
    evaluations = evaluations.reshape((1, len(models), len(train_set)))
    if ceil_set is not None and calc_noise_ceil:
        noise_ceil = cv_noise_ceiling(rdms,
                                      ceil_set,
                                      test_set,
                                      method=method,
                                      pattern_descriptor=pattern_descriptor)
    elif calc_noise_ceil:
        noise_ceil = np.array(noise_ceil).T
    else:
        noise_ceil = np.array([np.nan, np.nan])

    result = Result(models,
                    evaluations,
                    method=method,
                    cv_method='crossvalidation',
                    noise_ceiling=noise_ceil)
    return result