X_trans = np.hstack((X_num_trans, X_cat_trans))

    print 'Training model... '
    #rf = RandomForestClassifier(n_estimators=10)
    #rf = RandomForestClassifier(n_estimators=100)
    rf = RandomForestClassifier(n_estimators=1000)
    rf.fit(X_trans, y, sample_weight=w)
    joblib.dump(rf, 'models/rf.pkl')
    print

    print 'Predicting in sample... '
    evaluator = ModelEvaluator(
        imputer=imp, scaler=scaler,
        encoder=enc, model=rf
    )
    y_pred = evaluator.predict(X_num, X_cat)

    print 'Training Error = {0}'.format(np.sum(w[y != y_pred]) / np.sum(w))
    print 'Predicted +, - counts = {0}, {1}'.format(y_pred[y_pred==1].shape[0], y_pred[y_pred==0].shape[0])
    print

    print 'Importing test sample... '
    adapter = LearningDataAdapter(for_learning=True)
    adapter.adapt_file('data/validate.csv')
    X_num, X_cat = adapter.X_num, adapter.X_cat
    w, y = adapter.w, adapter.y
    print

    print 'Predicting out of sample... '
    y_pred = evaluator.predict(X_num, X_cat)
    print 'Testing Error = {0}'.format(np.sum(w[y != y_pred]) / np.sum(w))
예제 #2
0
    print 'Training model... '
    #rf = RandomForestClassifier(n_estimators=10)
    #rf = RandomForestClassifier(n_estimators=100)
    #rf = RandomForestClassifier(n_estimators=200)
    rf = RandomForestClassifier(n_estimators=1000)
    #rf.fit(X_trans, y, sample_weight=w)
    rf.fit(X_trans, y)
    joblib.dump(rf, 'models/rf.pkl')
    print

    print 'Predicting in sample... '
    evaluator = ModelEvaluator(imputer=imp,
                               scaler=scaler,
                               encoder=enc,
                               model=rf)
    y_pred = evaluator.predict(X_num, X_cat)

    #print 'Training Error = {0}'.format(np.sum(w[y != y_pred]) / np.sum(w))
    print 'Training Error = {0}'.format(y_pred[y != y_pred].shape[0] /
                                        float(y_pred.shape[0]))
    print 'Predicted +, - counts = {0}, {1}'.format(
        y_pred[y_pred == 1].shape[0], y_pred[y_pred == 0].shape[0])
    print

    print 'Importing test sample... '
    adapter = LearningDataAdapter(for_learning=True)
    adapter.adapt_file('data/validate.csv')
    X_num, X_cat = adapter.X_num, adapter.X_cat
    w, y = adapter.w, adapter.y
    print