Ejemplo n.º 1
0
    ax3.plot([0, 1], [0, 1], color='gray', linestyle='--')
    ax3.set_xlabel('Mean Predicted Value')
    ax3.set_ylabel('Fraction of positives')

    fig.tight_layout(pad=0)
    if outputfile:
        fig.savefig(outputfile)
    else:
        plt.show()


if __name__ == '__main__':

    df = read_data('signal.csv', 'background.csv')

    df = drop_useless(df)

    print(80*'=')
    print('{:^80}'.format('GaussianNB'))
    print(80*'=')
    gnb = GaussianNB()
    df_nb_label = df.dropna(axis=1)['label']
    df_nb = df.dropna(axis=1).drop('label', axis=1)
    print('{} remaining features after dropping columns NaNs.'.format(
        len(df_nb.columns)
    ))

    nb_aucs = classifier_crossval_performance(
        df_nb.values, df_nb_label.values, classifier=gnb
    )
Ejemplo n.º 2
0
    cval = StratifiedKFold(y, n_folds=n_folds, shuffle=True)

    with Parallel(n_jobs=n_jobs) as pool:
        performances = pool(
            delayed(evaluate)(X, y, train, test)
            for train, test in cval
        )

    performances = pd.Panel(dict(enumerate(performances)))

    return performances


if __name__ == '__main__':
    data = drop_useless(read_data('./signal.csv', './background.csv'))

    nb = GaussianNB()
    classifiers = {
        'RandomForest': RandomForestClassifier(
            n_estimators=100, criterion='entropy', n_jobs=2,
        ),
        # 'ExtraTrees': ExtraTreesClassifier(
        #     n_estimators=100, criterion='entropy', n_jobs=-1
        # ),
        'AdaBoost': GradientBoostingClassifier(
            n_estimators=100, loss='exponential',
        ),
        'NaiveBayes': GaussianNB()
    }