コード例 #1
0
ファイル: m32_joblib.py プロジェクト: songjeonghyun11/Bit
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8)

model = XGBRFClassifier(n_estimators=1000, learning_rate=0.1)

model.fit(x_train,
          y_train,
          verbose=True,
          eval_metric="error",
          eval_set=[(x_train, y_train), (x_test, y_test)])

#rmse,mae,logloss,error,auc

results = model.evals_result()
print("eval:", results)

y_pred = model.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print("acc:", acc)

# import pickle
# pickle.dump(model, open("./model/sample/xgb_save/cancer.pickle.dat", "wb"))

import joblib
joblib.dump(model, "./model/sample/xgb_save/cancer.joblib.dat")
print("저장됨.")

model2 = joblib.load("./model/sample/xgb_save/cancer.joblib.dat")
print("불러오깅")

y_pred = model.predict(x_test)
acc = accuracy_score(y_test, y_pred)
コード例 #2
0
model = XGBRFClassifier(
    n_estimators=300,  # verbose의 갯수, epochs와 동일
    learning_rate=0.1)

model.fit(x_train,
          y_train,
          verbose=True,
          eval_metric=['error', 'auc'],
          eval_set=[(x_train, y_train), (x_test, y_test)])
#   early_stopping_rounds = 100)
# eval_metic의 종류 : rmse, mae, logloss, error(error가 0.2면 accuracy는 0.8), auc(정확도, 정밀도; accuracy의 친구다)

results = model.evals_result()
print("eval's result : ", results)

y_pred = model.predict(x_test)

acc = accuracy_score(y_test, y_pred)
# print("r2 Score : %.2f%%" %(r2 * 100))
print("acc : ", acc)

thresholds = np.sort(model.feature_importances_)
import pickle
print(thresholds)

for thresh in thresholds:  #중요하지 않은 컬럼들을 하나씩 지워나간다.
    selection = SelectFromModel(model, threshold=thresh, prefit=True)

    selection_x_train = selection.transform(x_train)
    selection_x_test = selection.transform(x_test)
コード例 #3
0
    if test:
        y = None
        X = d.values
    else:
        y = np.ravel(d[['Survived']].values)
        X = d.drop(columns=['Survived']).values
    X = preprocessing.scale(X)
    return (X, y)


(Xtrain, ytrain) = for_model_input(trainset)
knn_imputer = KNNImputer()
Xtrain = knn_imputer.fit_transform(Xtrain)

boosted_model = XGBRFClassifier()
boosted_model.fit(Xtrain, ytrain)
boosted_scores = cross_val_score(boosted_model, Xtrain, ytrain, cv=5)

print("Gradient-Boosting Model CV scores:\n", boosted_scores,
      np.mean(boosted_scores))

(Xtest, _) = for_model_input(testset, test=True)
Xtest = knn_imputer.fit_transform(Xtest)
predictions_boosted = boosted_model.predict(Xtest)  # + 1) / 2
predictions_boosted = predictions_boosted.astype('int64')
pred_boosted_df = pandas.DataFrame(predictions_boosted, columns=['Survived'])
fin_ans_boosted = pandas.DataFrame(
    testset['PassengerId']).join(pred_boosted_df)
with open('predictions_xgboost_rf.csv', 'w') as f:
    f.write((fin_ans_boosted.to_csv(index=False)))
コード例 #4
0
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

print(f"\nBefore Standard Scaler, x.head() :- \n{ x.head() }")
x = sc.fit_transform(x)
print(f"\nAfter Standard Scaler, x :- \n{ x }")

from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

from xgboost import XGBRFClassifier
xgboost = XGBRFClassifier()

xgboost.fit(x_train, y_train)
y_pred = xgboost.predict(x_test)

print(
    f"xgboost.score( x_test, y_test ) = { xgboost.score( x_test, y_test ) * 100 }%"
)

import matplotlib.pyplot as plt

plt.plot(x_test,
         y_test,
         label='Actual',
         marker='*',
         color='blue',
         linestyle='')
plt.plot(x_test,
         y_pred,
コード例 #5
0
#
# # downsample majority
# neg_downsampled = resample(negative,
#                            replace=True,  # sample with replacement
#                            n_samples=len(positive),  # match number in minority class
#                            random_state=27)  # reproducible results
# # combine minority and downsampled majority
# downsampled = pd.concat([positive, neg_downsampled]).dropna()
# check new class counts
#
# X_train = pd.DataFrame(downsampled.drop(columns="target"), index=downsampled.index)
# y_train = pd.Series(downsampled["target"], index=downsampled.index)

my_model = XGBRFClassifier(random_state=1).fit(X_train, y_train)

predictions = my_model.predict(X_val)

print("Matthews Correlation Coefficient: " +
      str(matthews_corrcoef(predictions, y_val)))
print("Precision Score: " + str(precision_score(predictions, y_val)))
print("Recall Score: " + str(recall_score(predictions, y_val)))
ROC_curve(y_val, predictions)

X_train_filtered = pd.DataFrame(X_train).iloc[:, [
    173, 141, 530, 683, 661, 498, 48, 183, 206, 716, 697, 185, 211, 624, 671,
    623, 67, 111, 118, 129
]]
X_val_filtered = pd.DataFrame(X_val).iloc[:, [
    173, 141, 530, 683, 661, 498, 48, 183, 206, 716, 697, 185, 211, 624, 671,
    623, 67, 111, 118, 129
]]