Beispiel #1
0
    def __init__(self, calibrationTable, score_func=accuracy_score):
        """
        calibrate a classifier
        @param calibrationTable: a pandas data frame
        """

        print '--------- Calibrating Imputer -----------'
        X_cal, y_cal, _, _ = impute_risk_factors(calibrationTable)

        bestScore = -1
        bestPipe = None
        bestParams = None

        for name, (pipe, params) in make_pipes().iteritems():
            print '>'*10, name, '<'*10

            _, cur_bestParams, cur_bestScore = fitClfWithGridSearch(
                name + '_risk', pipe, params, DatasetPair(np.array(X_cal), y_cal),
                saveToDir='/home/jj/code/Kaggle/allstate/output/gridSearchOutput',
                useJJ=True, score_func=score_func, n_jobs=N_JOBS, verbosity=0,
                minimize=False, cvSplitNum=5,
                maxLearningSteps=10,
                numConvergenceSteps=4, convergenceTolerance=0, eliteProportion=0.1,
                parentsProportion=0.4, mutationProportion=0.1, mutationProbability=0.1,
                mutationStdDev=None, populationSize=6)

            if cur_bestScore > bestScore:

                bestScore = cur_bestScore
                bestPipe = clone(pipe)
                bestPipe.set_params(**cur_bestParams)
                bestParams = cur_bestParams

        print '----> best score:', bestScore
        pprint(bestParams)

        self._imputer = bestPipe
Beispiel #2
0
# plot_feature_importances(X_train, outputTable, inputTable.columns)

print '----------- individual accuracy score'

indivClfs = []

for col in outputTable_cal.columns:
    print '>'*20, col, '<'*20
    cur_y = np.array(outputTable_cal[col])

    bestScore = -1
    bestPipe = None
    bestParams = None

    for name, (pipe, params) in make_pipes().iteritems():
        print '>'*10, name, '<'*10

        _, cur_bestParams, cur_bestScore = fitClfWithGridSearch(
            '_'.join([name, col, calibrationName]), pipe, params, DatasetPair(X_cal, cur_y),
            saveToDir='/home/jj/code/Kaggle/allstate/output/gridSearchOutput',
            useJJ=True, score_func=accuracy_score, n_jobs=N_JOBS, verbosity=0,
            minimize=False, cvSplitNum=5,
            maxLearningSteps=10,
            numConvergenceSteps=4, convergenceTolerance=0, eliteProportion=0.1,
            parentsProportion=0.4, mutationProportion=0.1, mutationProbability=0.1,
            mutationStdDev=None, populationSize=6)

        if cur_bestScore > bestScore:

            bestScore = cur_bestScore