Esempio n. 1
0
def eval_fixed(models, data, theta=None, method='cosine'):
    """evaluates models on data, without any bootstrapping or
    cross-validation

    Args:
        models(list of rsatoolbox.model.Model or list): models to be evaluated
        data(rsatoolbox.rdm.RDMs): data to evaluate on
        theta(numpy.ndarray): parameter vector for the models
        method(string): comparison method to use

    Returns:
        float: evaluation

    """
    models, evaluations, theta, _ = input_check_model(models, theta, None, 1)
    evaluations = np.repeat(np.expand_dims(evaluations, -1), data.n_rdm, -1)
    for k, model in enumerate(models):
        rdm_pred = model.predict_rdm(theta=theta[k])
        evaluations[k] = compare(rdm_pred, data, method)
    evaluations = evaluations.reshape((1, len(models), data.n_rdm))
    noise_ceil = boot_noise_ceiling(data,
                                    method=method,
                                    rdm_descriptor='index')
    variances = np.cov(evaluations[0], ddof=1) \
        / evaluations.shape[-1]
    dof = evaluations.shape[-1] - 1
    result = Result(models,
                    evaluations,
                    method=method,
                    cv_method='fixed',
                    noise_ceiling=noise_ceil,
                    variances=variances,
                    dof=dof)
    return result
Esempio n. 2
0
def eval_bootstrap_rdm(models,
                       data,
                       theta=None,
                       method='cosine',
                       N=1000,
                       rdm_descriptor='index',
                       boot_noise_ceil=True):
    """evaluates models on data
    performs bootstrapping to get a sampling distribution

    Args:
        models(rsatoolbox.model.Model or list of these): models to be evaluated
        data(rsatoolbox.rdm.RDMs): data to evaluate on
        theta(numpy.ndarray): parameter vector for the models
        method(string): comparison method to use
        N(int): number of samples
        rdm_descriptor(string): rdm_descriptor to group rdms for bootstrap

    Returns:
        numpy.ndarray: vector of evaluations

    """
    models, evaluations, theta, _ = input_check_model(models, theta, None, N)
    noise_min = []
    noise_max = []
    for i in tqdm.trange(N):
        sample, rdm_idx = bootstrap_sample_rdm(data, rdm_descriptor)
        for j, mod in enumerate(models):
            rdm_pred = mod.predict_rdm(theta=theta[j])
            evaluations[i, j] = np.mean(compare(rdm_pred, sample, method))
        if boot_noise_ceil:
            noise_min_sample, noise_max_sample = boot_noise_ceiling(
                sample, method=method, rdm_descriptor=rdm_descriptor)
            noise_min.append(noise_min_sample)
            noise_max.append(noise_max_sample)
    if boot_noise_ceil:
        eval_ok = np.isfinite(evaluations[:, 0])
        noise_ceil = np.array([noise_min, noise_max])
        variances = np.cov(
            np.concatenate([evaluations[eval_ok, :].T, noise_ceil[:,
                                                                  eval_ok]]))
    else:
        eval_ok = np.isfinite(evaluations[:, 0])
        noise_ceil = np.array(
            boot_noise_ceiling(data,
                               method=method,
                               rdm_descriptor=rdm_descriptor))
        variances = np.cov(evaluations[eval_ok, :].T)
    dof = data.n_rdm - 1
    variances = np.cov(evaluations.T)
    result = Result(models,
                    evaluations,
                    method=method,
                    cv_method='bootstrap_rdm',
                    noise_ceiling=noise_ceil,
                    variances=variances,
                    dof=dof)
    return result
Esempio n. 3
0
def bootstrap_testset(models, data, method='cosine', fitter=None, N=1000,
                      pattern_descriptor=None, rdm_descriptor=None):
    """takes a bootstrap sample and evaluates on the rdms and patterns not
    sampled
    also returns the size of each test_set to allow later weighting
    or selection if this is desired.

    Args:
        models(rsatoolbox.model.Model): Models to be evaluated
        data(rsatoolbox.rdm.RDMs): RDM data to use
        method(string): comparison method to use
        fitter(function): fitting function
        pattern_descriptor(string): descriptor to group patterns
        rdm_descriptor(string): descriptor to group rdms

    Returns:
        numpy.ndarray: vector of evaluations of length N
        numpy.ndarray: n_rdm for each test_set
        numpy.ndarray: n_pattern for each test_set

    """
    models, evaluations, _, fitter = input_check_model(models, None, fitter, N)
    n_rdm = np.zeros(N, dtype=np.int)
    n_pattern = np.zeros(N, dtype=np.int)
    if pattern_descriptor is None:
        data.pattern_descriptors['index'] = np.arange(data.n_cond)
        pattern_descriptor = 'index'
    if rdm_descriptor is None:
        data.rdm_descriptors['index'] = np.arange(data.n_rdm)
        rdm_descriptor = 'index'
    for i_sample in range(N):
        sample, rdm_idx, pattern_idx = bootstrap_sample(
            data,
            rdm_descriptor=rdm_descriptor,
            pattern_descriptor=pattern_descriptor)
        train_set = [[sample, pattern_idx]]
        rdm_idx_test = data.rdm_descriptors[rdm_descriptor]
        rdm_idx_test = np.setdiff1d(rdm_idx_test, rdm_idx)
        pattern_idx_test = data.pattern_descriptors[pattern_descriptor]
        pattern_idx_test = np.setdiff1d(pattern_idx_test, pattern_idx)
        if len(pattern_idx_test) >= 3 and len(rdm_idx_test) >= 1:
            rdms_test = data.subsample_pattern(pattern_descriptor,
                                               pattern_idx_test)
            rdms_test = rdms_test.subsample(rdm_descriptor, rdm_idx_test)
            test_set = [[rdms_test, pattern_idx_test]]
            evaluations[i_sample] = crossval(
                models, data, train_set, test_set,
                method=method, fitter=fitter,
                pattern_descriptor=pattern_descriptor).evaluations[:, 0]
        else:
            evaluations[i_sample] = np.nan
        n_rdm[i_sample] = len(rdm_idx_test)
        n_pattern[i_sample] = len(pattern_idx_test)
    return evaluations, n_rdm, n_pattern
Esempio n. 4
0
def crossval(models,
             rdms,
             train_set,
             test_set,
             ceil_set=None,
             method='cosine',
             fitter=None,
             pattern_descriptor='index',
             calc_noise_ceil=True):
    """evaluates models on cross-validation sets

    Args:
        models(rsatoolbox.model.Model): models to be evaluated
        rdms(rsatoolbox.rdm.RDMs): full dataset
        train_set(list): a list of the training RDMs with 2-tuple entries:
            (RDMs, pattern_idx)
        test_set(list): a list of the test RDMs with 2-tuple entries:
            (RDMs, pattern_idx)
        method(string): comparison method to use
        pattern_descriptor(string): descriptor to group patterns

    Returns:
        numpy.ndarray: vector of evaluations

    """
    assert len(train_set) == len(test_set), \
        'train_set and test_set must have the same length'
    if ceil_set is not None:
        assert len(ceil_set) == len(test_set), \
            'ceil_set and test_set must have the same length'
    if isinstance(models, Model):
        models = [models]
    evaluations = []
    noise_ceil = []
    for i in range(len(train_set)):
        train = train_set[i]
        test = test_set[i]
        if (train[0].n_rdm == 0 or test[0].n_rdm == 0 or train[0].n_cond <= 2
                or test[0].n_cond <= 2):
            evals = np.empty(len(models)) * np.nan
        else:
            models, evals, _, fitter = \
                input_check_model(models, None, fitter)
            for j, model in enumerate(models):
                theta = fitter[j](model,
                                  train[0],
                                  method=method,
                                  pattern_idx=train[1],
                                  pattern_descriptor=pattern_descriptor)
                pred = model.predict_rdm(theta)
                pred = pred.subsample_pattern(by=pattern_descriptor,
                                              value=test[1])
                evals[j] = np.mean(compare(pred, test[0], method))
            if ceil_set is None and calc_noise_ceil:
                noise_ceil.append(
                    boot_noise_ceiling(rdms.subsample_pattern(
                        by=pattern_descriptor, value=test[1]),
                                       method=method))
        evaluations.append(evals)
    evaluations = np.array(evaluations).T  # .T to switch models/set order
    evaluations = evaluations.reshape((1, len(models), len(train_set)))
    if ceil_set is not None and calc_noise_ceil:
        noise_ceil = cv_noise_ceiling(rdms,
                                      ceil_set,
                                      test_set,
                                      method=method,
                                      pattern_descriptor=pattern_descriptor)
    elif calc_noise_ceil:
        noise_ceil = np.array(noise_ceil).T
    else:
        noise_ceil = np.array([np.nan, np.nan])

    result = Result(models,
                    evaluations,
                    method=method,
                    cv_method='crossvalidation',
                    noise_ceiling=noise_ceil)
    return result