Python Comparator.addModel 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: comparator

클래스/타입: Comparator

메소드/함수: addModel

hotexamples.com에서의 예제들: 5

Python Comparator.addModel - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 comparator.Comparator.addModel에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Comparator(30)

compare(11)

addModel(5)

addDataset(4)

check(4)

__init__(2)

addEntry(2)

generate_pnml(2)

check_hull(1)

compareRandomWithSearched(1)

compare_objects_lists(1)

generate_outputs(1)

예제 #1

파일 보기

from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from comparator import Comparator as Tester
pd.set_option('display.max_columns', None)


df = pd.read_csv(r"./data/processed_data.csv", engine="python")

tester1 = Tester('SeriousDlqin2yrs')
tester1.addDataset('processed_data', df)
# tester1.addModel('1', RandomForestClassifier(n_estimators=100, max_depth=16, max_features='auto', min_samples_leaf=100))
# tester1.addModel('2', RandomForestClassifier(n_estimators=100, max_depth=12, max_features='auto', min_samples_leaf=150))
# tester1.addModel('3', RandomForestClassifier(n_estimators=100, max_depth=8, max_features='auto'))

# tester1.addModel('1', GradientBoostingClassifier(n_estimators=200, learning_rate=0.08, subsample=0.95, max_depth=5, min_samples_leaf=43))
tester1.addModel('2', GradientBoostingClassifier(n_estimators=200, learning_rate=0.08, subsample=0.85, max_depth=5, min_samples_leaf=550))
tester1.runTests()

# n_estimators = [30, 50, 70, 90, 110, 130]
# learning_rate = [0.03,  0.05, 0.08,  0.1, 0.13, 0.15]
# plt.subplot(1, 2, 1)
# line1 = plt.plot(n_estimators, learning_rate, 'y', label='time_spent')
#
# plt.subplot(1, 2, 2)
# line2 = plt.plot(n_estimators, learning_rate, 'y', label='time_spent')
# plt.ylabel('time_spent')
#
# plt.ylim([0.84, 0.87])
#
# df = pd.read_csv(r"./data/processed_data.csv", engine="python")
# print(df.shape[0])

예제 #2

파일 보기

tester = Comparator('SeriousDlqin2yrs')

tester.addDataset('missing data processed', df)
tester.addDataset('debt ratio outliers removed',
                  removed_debt_outliers)  # 164 removed
tester.addDataset('debt ratio outliers replaced',
                  repalace_debt_ratio)  # 164 removed
tester.addDataset('overdue outliers replaced', repalace98)  #269 removed
tester.addDataset('utilization outliers removed', dfus)  # 241 removed
tester.addDataset('overdue outliers removed', drop98)
tester.addDataset('outliers added', add_outliers)
tester.addDataset('best_data', best_data)

# rf_default = RandomForestClassifier()
# dbdt_default = GradientBoostingClassifier()
# tester.addModel('default RF', rf_default)
# tester.addModel('default GBDT ', dbdt_default)

rf = RandomForestClassifier(n_estimators=32,
                            max_depth=8,
                            random_state=0,
                            max_features='auto',
                            oob_score=True)
# dbdt = GradientBoostingClassifier(n_estimators=250, subsample=0.8, min_samples_split=1000, learning_rate=0.06, max_depth=6 )
tester.addModel('RF', rf)
# tester.addModel('GBDT', dbdt)

# tester.addModel('Simple SVM', svm.LinearSVC())

test_auc, train_auc, time_spent = tester.runTests()

예제 #3

파일 보기

from sklearn.ensemble import RandomForestClassifier
from comparator import Comparator
import numpy

df = pd.read_csv(r"./data/processed_data.csv", engine="python")

add_outliers = df.copy()
outlier_count = int(df.shape[0] * 0.05)
index = numpy.random.randint(0, df.shape[0], outlier_count)
add_outliers.reset_index(drop=True, inplace=True)
for i in index:
    add_outliers.at[i, 'DebtRatio'] = numpy.random.randint(3000, 30000)

comparator = Comparator('SeriousDlqin2yrs')

comparator.addDataset('data', df)
comparator.addDataset('outliers added', add_outliers)

# comparator.addModel('tuned RF', RandomForestClassifier(n_estimators=100, max_depth=16, max_features='auto', min_samples_leaf=100))
# comparator.addModel('default RF', RandomForestClassifier())
comparator.addModel(
    'tuned GBDT',
    GradientBoostingClassifier(n_estimators=200,
                               learning_rate=0.05,
                               subsample=0.85,
                               max_depth=5,
                               min_samples_leaf=500))
comparator.addModel('default GBDT', GradientBoostingClassifier())

comparator.runTests()

예제 #4

파일 보기

paras["n_estimators"] = n_estimators
paras["max_depth"] = max_depth
paras["max_features"] = max_features
paras["min_samples_split"] = min_samples_split
paras["min_samples_leaf"] = min_samples_leaf

to_tuning = 'min_samples_leaf'

rfc = []
for i in range(0, len(paras[to_tuning])):
    rfc.append(
        RandomForestClassifier(n_estimators=100,
                               max_depth=16,
                               max_features='auto',
                               min_samples_leaf=int(min_samples_leaf[i])))
    tester1.addModel(i, rfc[i])

test_auc, train_auc, time_spent = tester1.runTests()

# plt.subplot(121)
line1, = plt.plot(paras[to_tuning], train_auc, 'b', label='Train AUC')
line2, = plt.plot(paras[to_tuning], test_auc, 'r', label='Test AUC')
plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})
plt.ylabel('AUC score')
plt.xlabel(to_tuning)
plt.ylim([0.85, 0.9])

# plt.subplot(122)
# line3, = plt.plot(paras[to_tuning], time_spent)
# plt.ylabel('time spent')
# plt.xlabel(to_tuning)

예제 #5

파일 보기

#
# plt.show()

for j in range(0, len(learning_rate)):
    rfc = []
    comparator = Comparator('SeriousDlqin2yrs')
    df = pd.read_csv(r"./data/processed_data.csv", engine="python")
    comparator.addDataset('processed_data', df)

    to_tuning = 'n_estimators'

    for i in range(0, len(paras[to_tuning])):
        rfc.append(
            GradientBoostingClassifier(n_estimators=n_estimators[i],
                                       learning_rate=learning_rate[j],
                                       subsample=0.85,
                                       max_depth=5,
                                       min_samples_leaf=550))
        comparator.addModel(i, rfc[i])
    test_auc, train_auc, time_spent = comparator.runTests()
    plt.subplot(len(learning_rate) / 2, 2, j + 1)
    plt.title("learning_rate=" + str(learning_rate[j]))
    line1, = plt.plot(paras[to_tuning], train_auc, 'b', label='Train AUC')
    line2, = plt.plot(paras[to_tuning], test_auc, 'r', label='Test AUC')
    plt.legend(handler_map={line1: HandlerLine2D(numpoints=2)})
    plt.ylabel('AUC score')
    plt.xlabel(to_tuning)

    plt.ylim([0.85, 0.875])
plt.show()