Beispiel #1
0
def test_HistGradientBoostingClassifier_multidim():
    from sklearn.experimental import enable_hist_gradient_boosting
    from sklearn.ensemble import HistGradientBoostingClassifier

    # train a tree-based model
    X, y = shap.datasets.adult()
    X = X[:100]
    y = y[:100]
    y = np.random.randint(0, 3, len(y))
    model = HistGradientBoostingClassifier(max_iter=10, max_depth=6).fit(X, y)
    explainer = shap.TreeExplainer(model, shap.sample(X, 10), model_output="raw")
    shap_values = explainer.shap_values(X)
    assert np.max(np.abs(shap_values[0].sum(1) + explainer.expected_value[0] - model.decision_function(X)[:,0])) < 1e-4
Beispiel #2
0
res = 1 / (1 + np.exp(-multi_clf.decision_function(patient_test_data)))

task1_df = pd.DataFrame(data=res, columns=train_labels.columns[1:11])

task1_df.to_csv('subtask1.csv', index=False, header=True)

t2 = time.time()
print('subtask1, time taken: ', t2 - t1)
print(task1_df)

#subtask 2
t1 = time.time()
clf.fit(patient_train_data, train_labels.to_numpy()[:, 11])

res = 1 / (1 + np.exp(-clf.decision_function(patient_test_data)))

task2_df = pd.DataFrame(data=res, columns=[train_labels.columns[11]])

task2_df.to_csv('subtask2.csv', index=False, header=True)

t2 = time.time()
print('subtask2, time taken: ', t2 - t1)
print(task2_df)

#subtask 3
t1 = time.time()
reg = HistGradientBoostingRegressor(random_state=1510)

res = np.empty(shape=(len(patient_test_data), 4))
for feat in range(12, 16):