예제 #1
0
    y_test = np.zeros((X_test.shape[0],1))
    y_train = np.zeros((X_train.shape[0],1))
    y_train[range(X_train_pos.shape[0])]=1
    y_test[range(X_test_pos.shape[0])]=1
    print("X size: ",X_train.shape[0],'x',X_train.shape[1])
    print("y size: ",y_train.shape[0],'x',y_train.shape[1])
    print("X-test size: ",X_test.shape[0],'x',X_test.shape[1])
    print("y-test size: ",y_test.shape[0],'x',y_test.shape[1])

    # train and test, performance output    
    #clf = tune_ebm(X_train, y_train)
    clf = ExplainableBoostingClassifier(random_state=seed, interactions=100)
    clf.fit(X_train, y_train)
    print("Finished training ...")
    curr_perf = []
    y_pred = clf.predict(X_test)
    curr_perf += [metrics.accuracy_score(y_test, y_pred)]
    print(metrics.confusion_matrix(y_test, y_pred))
    y_pred = clf.predict_proba(X_test)
    curr_perf += [get_aucpr(y_test, y_pred[:,1])]
    curr_perf += [get_auc(y_test, y_pred[:,1])]
    print("Performance: ",curr_perf)

    # predict on larger set, output predictions
    print("Predicting on all test pairs now... ")
    scores = (clf.predict_proba(X_neg_all))[:,1]
    neg_pps['score'] = scores   
    neg_pps.to_csv(outfile)
    
    # save model
    #save_model(clf,format("models/ebm_covonly_split%d_1to1_int.pkl" % split))
예제 #2
0
iX_train,  iX_test, y_train, y_test = \
    train_test_split(iX, y, test_size=0.25, stratify=y, random_state=0)

X_train, X_test = X[iX_train], X[iX_test]

X_test_out = data_test_out
y_test_out = labels_test_out

#%%
from interpret.glassbox import ExplainableBoostingClassifier

ebm = ExplainableBoostingClassifier()
ebm.fit(data_pts_1, labels_pts_1)

labels_pt_2_pred = ebm.predict(data_pts_2)
#%%

# Try isolation forest for outlier detection
X = data_pts_1

from sklearn.ensemble import IsolationForest

clf = IsolationForest(random_state=0, n_jobs=-1, contamination=0.25).fit(X)

A = clf.predict(X)

print((A == -1).mean(), (labels != 0).mean(),
      ((A == -1) == (labels != 0)).mean())

#%%
예제 #3
0
show(lr_global)

# %% Fit decision tree model
tree = ClassificationTree()
tree.fit(X_train, y_train)
print("Training finished.")
y_pred = tree.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

# %% Explain local prediction
tree_local = tree.explain_local(X_test[:100], y_test[:100], name='Tree')
show(tree_local)

# %% Fit Explainable Boosting Machine
ebm = ExplainableBoostingClassifier(random_state=2021)
ebm.fit(X_train, y_train) 
print("Training finished.")
y_pred = ebm.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")

# %% Explain locally
ebm_local = ebm.explain_local(X_test[:100], y_test[:100], name='EBM')
show(ebm_local)

# %% Explain globally
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)
# %%
예제 #4
0
plt.show()

# ### Explainable Boosting Machine

# In[9]:

from interpret.glassbox import ExplainableBoostingClassifier

ebm = ExplainableBoostingClassifier()
ebm.fit(train_X, train_y)

# In[12]:

# display confusion matrices for train and test data

classificationSummary(train_y, ebm.predict(train_X))
classificationSummary(test_y, ebm.predict(test_X))

# In[10]:

from interpret import show

ebm_global = ebm.explain_global()
show(ebm_global)

# In[ ]:

ebm_local = ebm.explain_local(test_X, test_y)
show(ebm_local)

# ### RandomForest Regression Model
예제 #5
0
test_idxes = []
for train_index, test_index in kf.split(X, y):
    train_idxes.append(train_index)
    test_idxes.append(test_index)

splitwise_perf = []
for split in range(0, 5):
    X_train, X_test = X.iloc[train_idxes[split], :], X.iloc[
        test_idxes[split], :]
    y_train, y_test = y[train_idxes[split]], y[test_idxes[split]]
    #X_train, X_test, X_cov = normalize_train_test_cov(X_train, X_test, X_cov)
    y_train = y_train.ravel()
    clf = ExplainableBoostingClassifier(
        random_state=seed)  #, interactions=100)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(metrics.confusion_matrix(y_test, y_pred))
    curr_perf = []
    curr_perf += [metrics.accuracy_score(y_test, y_pred)]
    y_pred = clf.predict_proba(X_test)
    curr_perf += [get_aucpr(y_test, y_pred[:, 1])]
    curr_perf += [get_auc(y_test, y_pred[:, 1])]
    y_pred_cov = clf.predict(X_cov)
    print(metrics.confusion_matrix(y_cov, y_pred_cov))
    y_pred_cov = clf.predict_proba(X_cov)
    curr_perf += [get_aucpr(y_cov, y_pred_cov[:, 1])]
    curr_perf += [get_auc(y_cov, y_pred_cov[:, 1])]
    print(curr_perf)
    splitwise_perf.append(curr_perf)
    # save model
    #save_model(clf,format("models/ebm_humanpartners_1to1_no3mer_nonorm_split%d.pkl" % split))
예제 #6
0
            y_train_cov, y_test_cov = y_cov[train_idxes_cov[split]], y_cov[
                test_idxes_cov[split]]

            #X_train_cov, y_train_cov = undersample_negatives(X_train_cov, y_train_cov, 50)

            y_train_cov = y_train_cov.ravel()
            #clf = tune_ebm(X_train_cov, y_train_cov)

            if interac == 0:
                clf = ExplainableBoostingClassifier()
            else:
                clf = ExplainableBoostingClassifier(interactions=interac)

            clf.fit(X_train_cov, y_train_cov)
            curr_perf = []
            y_pred_cov = clf.predict(X_test_cov)
            #curr_perf += [metrics.accuracy_score(y_test_cov, y_pred_cov)]
            print(metrics.confusion_matrix(y_test_cov, y_pred_cov))
            y_pred_cov = clf.predict_proba(X_test_cov)
            curr_perf += [get_aucpr_R(y_test_cov, y_pred_cov[:, 1])]
            curr_perf += [get_auc_R(y_test_cov, y_pred_cov[:, 1])]
            curr_perf += [get_fmax(y_test_cov, y_pred_cov[:, 1])]
            curr_perf += get_early_prec(y_test_cov, y_pred_cov[:, 1])
            print(curr_perf)
            splitwise_perf.append(curr_perf)
            # save model
            #save_model(clf,format("models//ebm_covonly_split%d_1to10_int%d.pkl" % (split, interac)))
            save_model(
                clf,
                format("%s/split%d_1to%d_int%d_trial%d.pkl" %
                       (out_dir, split, int(negfrac), interac, trial)))