Exemple #1
0
                               n_estimators=1000)
#96.33 accuracy with ktree =RandomForestClassifier(criterion='gini', max_features='log2',random_state=10, max_depth=15,n_estimators=1000)
y = np.ravel(Y_train)
ktree.fit(X_train, y)
kaggle_prediction = ktree.predict(X_kaggle)
#forming a dataframe with columns id and class to save the predictions into a csv file
frame = pd.DataFrame(kaggle_prediction, columns=['class'])
frame.index = frame.index + 1
frame.to_csv(
    r'C:\Users\Vinni\Desktop\Sem 3 Spring 2021\CS 529 ML\Prog1 Decision Trees\Gene\gene_1\my_gene_kaggle.csv',
    index_label='id')

#PLOTS
#1) ALPHA VALUE AND ALPHA VS ACCURACY PLOT
clf = DecisionTreeClassifier(random_state=20)
path = clf.cost_complexity_pruning_path(X_train, Y_train)
ccp_alphas, impurities = path.ccp_alphas, path.impurities
clfs = []
ccp_alphas = ccp_alphas[:-1]
for ccp_alpha in ccp_alphas:
    clf = DecisionTreeClassifier(random_state=20, ccp_alpha=ccp_alpha)
    clf.fit(X_train, Y_train)
    clfs.append(clf)
print("Number of nodes in the last tree is:{} with cpp_alpha: {}".format(
    clfs[-1].tree_.node_count, ccp_alphas[-1]))

train_scores = [clf.score(X_train, Y_train) for clf in clfs]
test_scores = [clf.score(X_test, Y_test) for clf in clfs]

fig, ax = plt.subplots()
ax.set_xlabel("alpha")