import pandas as pd
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import average_precision_score
import numpy as np
from sklearn.metrics import accuracy_score
from me_made_module import me
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import log_loss

X_train, X_test, Y_train, Y_test = me.get_split(0.224)

clf = tree.DecisionTreeClassifier()
clf = clf.fit(X_train, Y_train)

y_test_predict = clf.predict(X_test)
y_test_proba = clf.predict_proba(X_test)

y_train_predict = clf.predict(X_train)
y_train_proba = clf.predict_proba(X_train)

y_test = Y_test.tolist()
y_train = Y_train.tolist()

y_test_proba = np.array(y_test_proba)
y_test_proba = y_test_proba.T
y_test_proba = y_test_proba[1]

y_train_proba = np.array(y_train_proba)
Esempio n. 2
0
#     0.4,
#     0.45,
#     0.5,
#     0.55,
#     0.6,
#     0.65,
#     0.7
# ]
schedule = np.linspace(0.215, 0.23, 100)
rac_mean = []
ps_mean = []
rs_mean = []
f1_mean = []
ac_mean = []
for s in schedule:
    X_train, X_test, Y_train, Y_test = me.get_split(s)

    # train_record = []
    rac_record = []
    ps_record = []
    rs_record = []
    f1_record = []
    ac_record = []
    for model in range(200):
        clf = tree.DecisionTreeClassifier()
        clf = clf.fit(X_train, Y_train)

        y_test_predict = clf.predict(X_test)
        y_test_proba = clf.predict_proba(X_test)

        y_train_predict = clf.predict(X_train)