def test_calibration(X, y, cal_method='sigmoid'):
    # CalibratedClassifierCV
    base_clf = XGBClassifier(objective='binary:logitraw')
    base_cv = WindowSplit(test_size=120, step_size=120, sliding_size=5480, initial_test_index=-480)
    cal_cv = WindowSplit(test_size=120, step_size=120, sliding_size=5000, initial_test_index=-480)
    cal_clf = CalibratedClassifierCV(base_clf, method=cal_method, cv=cal_cv)
    cv_clf = ClassifierCV(cal_clf, cv=base_cv)
    cv_clf.fit(X, y)

    print(cal_method + ' Calibrated accuracy: %s' % cv_clf.score_cv())
    print(cal_method + ' Calibrated logloss: %s' % cv_clf.score_cv(skm.log_loss))
def test_threshold(X, y, thr_method='youden'):
    # ThresholdClassifierCV
    base_clf = XGBClassifier(objective='binary:logistic')
    base_cv = WindowSplit(test_size=120, step_size=120, sliding_size=5480, initial_test_index=-480)
    thr_cv = WindowSplit(test_size=120, step_size=120, sliding_size=5000, initial_test_index=-480)
    thr_clf = ThresholdClassifierCV(base_clf, method=thr_method, cv=thr_cv)
    cv_clf = ClassifierCV(thr_clf, cv=base_cv)
    cv_clf.fit(X, y)

    print(thr_method + ' Threshold: %s' % (sum([unit.threshold for unit in cv_clf.classifiers_])/len([unit.threshold for unit in cv_clf.classifiers_])))
    print(thr_method + ' Threshold accuracy: %s' % cv_clf.score_cv())
    print(thr_method + ' Threshold logloss: %s' % cv_clf.score_cv(skm.log_loss))
def test_calibration_threshold(X, y, cal_method='sigmoid', thr_method='youden'):
    # # CalibratedClassifierCV and CutoffClassifierCV
    cal_cv = WindowSplit(test_size=120, step_size=120, sliding_size=5000, initial_test_index=-480)
    thr_cv = WindowSplit(test_size=120, step_size=120, sliding_size=5480, initial_test_index=-480)
    cv_cv = WindowSplit(test_size=120, step_size=120, sliding_size=5960, initial_test_index=-480)
    base_clf = XGBClassifier(objective='binary:logitraw')
    cal_clf = CalibratedClassifierCV(base_clf, method=cal_method, cv=cal_cv)
    thr_clf = ThresholdClassifierCV(cal_clf, method=thr_method, cv=thr_cv)
    cv_clf = ClassifierCV(thr_clf, cv=cv_cv)
    cv_clf.fit(X, y)

    print(cal_method + ' ' + thr_method + ' Threshold: %s' % (sum([unit.threshold for unit in cv_clf.classifiers_])/len([unit.threshold for unit in cv_clf.classifiers_])))
    print(cal_method + ' ' + thr_method + ' Threshold accuracy: %s' % cv_clf.score_cv())
    print(cal_method + ' ' + thr_method + ' Threshold logloss: %s' % cv_clf.score_cv(skm.log_loss))
def test_cutoff(X, y):
    # # CutoffClassifierCV
    base_clf = XGBClassifier(objective='binary:logistic')
    base_cv = WindowSplit(test_size=120, step_size=120, sliding_size=5480, initial_test_index=-480)
    cut_cv = WindowSplit(test_size=120, step_size=120, sliding_size=5000, initial_test_index=-480)
    cut_clf = CutoffClassifierCV(base_clf, cv=cut_cv)
    cv_clf = ClassifierCV(cut_clf, cv=base_cv)
    cv_clf.fit(X, y)

    # Get cutoff
    upper_cutoffs = [unit.cutoff[0] for unit in cv_clf.classifiers_]
    lower_cutoffs = [unit.cutoff[1] for unit in cv_clf.classifiers_]
    print('Cutoff: ' + str((sum(upper_cutoffs)/len(upper_cutoffs), sum(lower_cutoffs)/len(lower_cutoffs))))
    print('Cutoff accuracy: %s' % cv_clf.score_cv())
    print('Cutoff logloss: %s' % cv_clf.score_cv(skm.log_loss))
def test_calibration_cutoff(X, y, cal_method='sigmoid'):
    # # CalibratedClassifierCV and CutoffClassifierCV
    cal_cv = WindowSplit(test_size=120, step_size=120, sliding_size=5000, initial_test_index=-480)
    cut_cv = WindowSplit(test_size=120, step_size=120, sliding_size=5480, initial_test_index=-480)
    cv_cv = WindowSplit(test_size=120, step_size=120, sliding_size=5960, initial_test_index=-480)
    base_clf = XGBClassifier(objective='binary:logitraw')
    cal_clf = CalibratedClassifierCV(base_clf, method=cal_method, cv=cal_cv)
    cut_clf = CutoffClassifierCV(cal_clf, cv=cut_cv)
    cv_clf = ClassifierCV(cut_clf, cv=cv_cv)
    cv_clf.fit(X, y)

    # Get cutoff
    upper_cutoffs = [unit.cutoff[0] for unit in cv_clf.classifiers_]
    lower_cutoffs = [unit.cutoff[1] for unit in cv_clf.classifiers_]
    print(cal_method + ' Cutoff: ' + str((sum(upper_cutoffs)/len(upper_cutoffs), sum(lower_cutoffs)/len(lower_cutoffs))))
    print(cal_method + ' Cutoff accuracy: %s' % cv_clf.score_cv())
    print(cal_method + ' Cutoff logloss: %s' % cv_clf.score_cv(skm.log_loss))
Example #6
0
def do_classifier_transforms(base_clf,
                             cv_list,
                             cv_params,
                             base_only=False,
                             **kwargs):
    # add master transform to end of list
    transforms = get_transforms(cv_params, base_only=base_only)

    clf = clone(base_clf)
    for i, transform in enumerate(transforms):
        if 'calibration' in transform:
            clf = CalibratedClassifierCV(base_estimator=clf,
                                         method=transform['method'],
                                         cv=cv_list[i])
        elif 'threshold' in transform:
            clf = ThresholdClassifierCV(base_estimator=clf,
                                        method=transform['method'],
                                        cv=cv_list[i])
        elif 'cutoff' in transform:
            clf = CutoffClassifierCV(base_estimator=clf, cv=cv_list[i])
        elif 'master' in transform:
            clf = ClassifierCV(base_estimator=clf, cv=cv_list[i], **kwargs)

    return clf
def init():
    X, y = make_classification(7000)

    print('Truth freqs: %s' % str({k: v for k, v in zip(*[x.tolist() for x in np.unique(y, return_counts=True)])}))

    # Base CV
    cv = WindowSplit(test_size=120, step_size=120, sliding_size=1000, initial_test_index=-480)
    base_clf = XGBClassifier()
    cv_clf = ClassifierCV(base_clf, cv)
    cv_clf.fit(X, y)

    print('Base accuracy: %s' % cv_clf.score_cv())
    print('Base logloss: %s' % cv_clf.score_cv(skm.log_loss))

    test_calibration(X, y, cal_method='sigmoid')
    test_calibration(X, y, cal_method='isotonic')
    test_calibration(X, y, cal_method='rocch')
    test_calibration(X, y, cal_method='beta')
    test_threshold(X, y, thr_method='youden')
    test_threshold(X, y, thr_method='roc')
    test_cutoff(X, y)
    test_calibration_threshold(X, y, cal_method='sigmoid', thr_method='youden')
    test_calibration_threshold(X, y, cal_method='isotonic', thr_method='youden')
    test_calibration_threshold(X, y, cal_method='rocch', thr_method='youden')
    test_calibration_threshold(X, y, cal_method='beta', thr_method='youden')
    test_calibration_threshold(X, y, cal_method='sigmoid', thr_method='roc')
    test_calibration_threshold(X, y, cal_method='isotonic', thr_method='roc')
    test_calibration_threshold(X, y, cal_method='rocch', thr_method='roc')
    test_calibration_threshold(X, y, cal_method='beta', thr_method='roc')
    test_calibration_cutoff(X, y, cal_method='sigmoid')
    test_calibration_cutoff(X, y, cal_method='isotonic')
    test_calibration_cutoff(X, y, cal_method='rocch')
    test_calibration_cutoff(X, y, cal_method='beta')

    import pdb; pdb.set_trace()
    pass