Beispiel #1
0
def validate_ladder(config, df, y, train_indexes, val_indexes, sub_name, name):
    _config = config.copy()
    _y = y.copy()
    X = df[_config.pop('x_features')].values.astype(np.float)
    binary = _config.pop('binary')
    score = {
        'config': json.dumps(config),
        'error': None,
    }
    if binary:
        _y = binarize_y(y)

    try:
        res, inputs = train_ladder(
            config,
            dataset={
                'ovadataset': OvaDataset(X, _y),
                'train_indexes': train_indexes,
                'val_indexes': val_indexes,
            },
            save_to='ladder/{}/{}'.format(sub_name, name)
        )
    except Exception as e:
        res = np.zeros((len(y[val_indexes]), 3))
        score['error'] = str(e)

    score.update(create_score_dict(
        binarize_y(y[val_indexes]), binarize_y(res.argmax(axis=1)),
    ))
    pd.Series(score).to_csv("./results/ladder/{}/{}/score.csv".format(sub_name, name))
    return score
Beispiel #2
0
def cv_nn(indexes, grid, pred_function=pred_ann, df=df, y=y):
    test_scores = []
    for idx, fold in enumerate(indexes):
        configs = list(ParameterGrid(grid))
        nested_cv_results = Parallel(n_jobs=N_CORES)(
            delayed(validate_ann)(
                config, df, y,
                nested_fold['train'],
                nested_fold['val'],
                pred_function=pred_function,
                seed=seed,
            )
            for config, nested_fold in
            product(configs, fold['nested_indexes'])
        )
        df_scores = pd.DataFrame(nested_cv_results)
        df_scores.to_csv(
            '{}/fold_dl_{}_scores.csv'.format(RESULTS_DIRECTORY, idx), index=False
        )
        sorted_configs = df_scores.groupby('config').mean().sort_values(
            ['cost_matrix', 'SEN', 'NPV'], ascending=[True, False, False]
        )
        config = yaml.safe_load(sorted_configs.index[0])
        np.random.seed(seed)
        pred = pred_function(
            df, fold['train'], fold['test'], config
        )
        test_scores.append(
            create_score_dict(y_bin[fold['test']], binarize_y(pred))
        )
        print test_scores[-1]
    results = pd.DataFrame(test_scores)
    results.to_csv('{}/dl_all.csv'.format(RESULTS_DIRECTORY), index=False)
    return results
Beispiel #3
0
def validate_ann(
    config, df, y, train_indexes, val_indexes, pred_function, seed=1
):
    np.random.seed(seed)  # for reproducibility
    score = {
        'config': json.dumps(config),
    }
    result = pred_function(df, train_indexes, val_indexes, config)
    score.update(create_score_dict(
        binarize_y(y[val_indexes]), binarize_y(result),
    ))
    return score
Beispiel #4
0
def cv_ladders(configs, indexes, name):
    test_scores = []
    for idx, fold in enumerate(indexes):
        scores = Parallel(n_jobs=N_CORES)(
            delayed(validate_ladder)(
                config, df, y,
                nested_fold['train'],
                nested_fold['val'],
                sub_name=name,
                name="ova_{}_{}_{}".format(name, idx, inner_idx),
            )
            for inner_idx, (config, nested_fold) in
            enumerate(product(configs[:1], fold['nested_indexes']))
        )
        df_scores = pd.DataFrame(scores)
        df_scores.to_csv(
            "./results/ladder/{}/fold_{}_{}_scores.csv".format(name, name, idx),
            index=False
        )
        sorted_configs = df_scores.groupby('config').mean().sort_values(
            ['cost_matrix', 'SEN'], ascending=[True, False]
        )
        _config = yaml.safe_load(sorted_configs.index[0])
        X = df[_config.pop('x_features')].values.astype(np.float)

        _y = y.copy()
        if _config.pop('binary'):
            _y = binarize_y(y)

        res, inputs = train_ladder(
            _config,
            dataset={
                'ovadataset': OvaDataset(X, _y),
                'train_indexes': fold['train'],
                'val_indexes': fold['test'],
            },
            save_to='ladder/{}/ova_{}_{}'.format(name, name, idx)
        )
        binarized_y_true = binarize_y(y[fold['test']])
        binarized_y_pred = binarize_y(res.argmax(axis=1))
        test_scores.append(
            create_score_dict(binarized_y_true, binarized_y_pred)
        )
    results = pd.DataFrame(test_scores)
    results.to_csv("./results/ladder/{}/{}_all.csv".format(name, name), index=False)
    return results
Beispiel #5
0
def cv_old_models(df, indexes, y_bin):
    models = [
        'TimmermannBin',
        'LR1Bin',
        'LR2Bin',
        'SMBin',
        'AdnexBin',
    ]
    models_dict = dict.fromkeys(models)
    for model in models:
        test_scores = []
        for fold in indexes:
            y_true = y_bin[fold['test']]
            y_pred = df[model].values.astype(np.int)[fold['test']]
            test_scores.append(create_score_dict(y_true, y_pred))
        models_dict[model] = pd.DataFrame(test_scores)
    return models_dict