def md_with_dataframe():
    cols = 'person_id   question_code   survey_code response Corps'.split()
    recs = [
        (3554131, 'CSI1', '1415MYS', 5, '1st year'),
        (3490339, 'CSI1', '1415MYS', 3, '1st year'),
        (3557769, 'CSI1', '1415MYS', 3, '1st year'),
        (3013575, 'CSI1', '1415MYS', 5, '1st year'),
        (3554131, 'CSI1', '1415EYS', 6, '1st year'),
        (3490339, 'CSI1', '1415EYS', 6, '1st year'),
        (3557769, 'CSI1', '1415EYS', 6, '1st year'),
        (3013575, 'CSI1', '1415EYS', 5, '1st year'),
    ]
    md = ModelData()
    md.df = pd.DataFrame.from_records(recs, columns=cols)
    return md
Exemple #2
0
def fit(cache,
        eid_set=None,
        experiments_exclude=[],
        high_res=False,
        threshold_injection=True):
    logging.debug('getting data')
    ipsi_data = ModelData(cache, ROOT_ID).get_regional_data(
        eid_set=eid_set,
        experiments_exclude=experiments_exclude,
        high_res=high_res,
        threshold_injection=threshold_injection,
        projection_hemisphere_id=2)

    contra_data = ModelData(cache, ROOT_ID).get_regional_data(
        eid_set=eid_set,
        experiments_exclude=experiments_exclude,
        high_res=high_res,
        projection_hemisphere_id=1,
        threshold_injection=threshold_injection)

    X = ipsi_data.injections
    y = np.hstack((ipsi_data.projections, contra_data.projections))

    logging.debug('fitting')
    reg = HomogeneousModel(kappa=np.inf)
    reg.fit(X, y)

    # get ids
    ss_ids = get_summary_structure_ids(cache)
    injection_key = ipsi_data.injection_mask.get_key(structure_ids=ss_ids,
                                                     hemisphere_id=2)
    ipsi_key = ipsi_data.projection_mask.get_key(structure_ids=ss_ids,
                                                 hemisphere_id=2)
    contra_key = contra_data.projection_mask.get_key(structure_ids=ss_ids,
                                                     hemisphere_id=1)

    injection_regions = nonzero_unique(injection_key)
    ipsi_regions = nonzero_unique(ipsi_key)
    contra_regions = nonzero_unique(contra_key)

    ipsi_w = pd.DataFrame(data=reg.weights[:, :len(ipsi_regions)],
                          index=injection_regions,
                          columns=ipsi_regions)
    contra_w = pd.DataFrame(data=reg.weights[:, len(ipsi_regions):],
                            index=injection_regions,
                            columns=contra_regions)

    return pd.concat((ipsi_w, contra_w), keys=('ipsi', 'contra'), axis=1)
Exemple #3
0
def fit_structure(cache,
                  structure_id,
                  experiments_exclude,
                  kernel=None,
                  model_option='standard'):
    data = ModelData(
        cache,
        structure_id).get_voxel_data(experiments_exclude=experiments_exclude)

    # nested cross val
    logging.debug("Performing cross validation: (%d samples, %d vars)",
                  *data.projections.shape)
    error = VoxelModelError(cache, data, kernel=kernel)
    reg = error.single_cv(option=model_option)

    logging.debug("score          : %.2f", reg.best_score_)
    if kernel == 'polynomial':
        results = dict(shape=reg.kernel.shape, support=reg.kernel.support)
        logging.debug("optimal shape  : %.0f", results['shape'])
        logging.debug("optimal support: %.0f", results['support'])
    else:
        results = dict(gamma=reg.gamma)
        logging.debug("optimal gamma  : %.3f", results['gamma'])
        logging.debug("(optimal sigma : %.3f)", 1 / sqrt(results['gamma']))

    return results
Exemple #4
0
def run_structure(cache, structure_id, kernel=None, cv=None, eid_set=None,
                  experiments_exclude=[], error_option='standard'):

    data = ModelData(cache, structure_id).get_voxel_data(
        eid_set=eid_set, experiments_exclude=experiments_exclude)

    # nested cross val
    logging.debug("Performing nested cross validation: (%d samples, %d vars)",
                  *data.projections.shape)
    error = VoxelModelError(cache, data, cv=cv, kernel=kernel)
    error.run(option=error_option)

    return error.scores
Exemple #5
0
def run_structure(cache, structure_id, cv=None, eid_set=None, experiments_exclude=[],
                  high_res=False, threshold_injection=True):

    data = ModelData(cache, structure_id).get_regional_data(
        eid_set=eid_set, experiments_exclude=experiments_exclude, high_res=high_res,
        threshold_injection=threshold_injection)

    # nested cross val
    logging.debug("Performing nested cross validation: "
                  "(%s samples, %s vars)" % data.projections.shape)
    error = HomogeneousModelError(cache, data, cv=cv)
    error.run()

    return error.scores
Exemple #6
0
def fit_structure(cache,
                  structure_id,
                  experiments_exclude,
                  kernel_params,
                  model_option='standard'):
    data = ModelData(
        cache,
        structure_id).get_voxel_data(experiments_exclude=experiments_exclude)

    # nested cross val
    nw_kwargs = dict()
    if 'shape' in kernel_params:
        nw_kwargs['kernel'] = Polynomial(**kernel_params)
    else:
        nw_kwargs['kernel'] = 'rbf'
        nw_kwargs['gamma'] = kernel_params.pop('gamma')

    error = VoxelModelError(cache, data)
    return data, error.fit(**nw_kwargs, option=model_option)
def run_structure(cache,
                  structure_id,
                  eid_set=None,
                  experiments_exclude=[],
                  error_option='standard',
                  **nw_kwargs):

    data = ModelData(cache, structure_id).get_voxel_data(
        eid_set=eid_set, experiments_exclude=experiments_exclude)

    # nested cross val
    logging.debug("Performing cross validation: (%d samples, %d vars)",
                  *data.projections.shape)
    scoring_dict = LogHybridScorer(cache).scoring_dict
    reg = NadarayaWatson(**nw_kwargs)
    scores = cross_validate(reg,
                            X=data.centroids,
                            y=data.projections,
                            cv=LeaveOneOut(),
                            scoring=scoring_dict,
                            return_train_score=True,
                            n_jobs=-1)
    return scores