target = dataset.iloc[:, 30].values data = dataset.iloc[:, 0:30].values #print(target) #print(data) #machine = RandomForestClassifier(criterion = 'gini', max_depth=80, n_estimators = 11) depth = [10, 20, 30, 50, 60] for dep in depth: machine = RandomForestClassifier(criterion='gini', max_depth=dep, n_estimators=11) acc_score, conf_matrix = run_kfold(4, data, target, machine) print('\n\n') print('depth = ', str(dep)) print(acc_score) for i in conf_matrix: print(i) ''' depth = 80 accuracy_score: [0.90474, 0.9046, 0.90586, 0.90568] confusion_matrix: [[ 258 429 5 0] [ 39 33098 1105 3] [ 1 2342 7749 230] [ 0 8 601 4132]]
import kfold_template import pandas as pd from sklearn import linear_model dataset = pd.read_csv('regression_dataset.csv') # y1 is continuous Y, y2 is binomial. here we use y1 #target = dataset.iloc[:,1].values target = dataset.iloc[:, 2].values data = dataset.iloc[:, 3:].values machine = linear_model.LogisticRegression() acc_score, conf_matr = kfold_template.run_kfold(4, data, target, machine) print(acc_score, conf_matr) # print readable results for item in conf_matr: print(item)
import kfold_template from matplotlib import pyplot as plt from sklearn.datasets.samples_generator import make_blobs from sklearn import svm # Data generating data, target = make_blobs(n_samples=400, centers=2, cluster_std=1, random_state=0) print(data) print(target) plt.scatter(data[:, 0], data[:, 1], c=target) plt.savefig("plot.png") r2_scores, accuracy_scores, confusion_matrices = kfold_template.run_kfold( 5, data, target, svm.SVC(kernel="linear"), 1, 1) print(r2_scores) print(accuracy_scores) for i in confusion_matrices: print(i)
import kfold_template from sklearn import linear_model import pandas dataset = pandas.read_csv("regression_dataset.csv") target = dataset.iloc[:, 2].values data = dataset.iloc[:, 3:9].values machine = linear_model.LogisticRegression() results_accuracy, results_confusion = kfold_template.run_kfold( 4, data, target, machine) print(results_accuracy) for i in results_confusion: print(i)
import kfold_template import pandas # from sklearn import tree from sklearn.ensemble import RandomForestClassifier dataset = pandas.read_csv("dataset.csv") target = dataset.iloc[:,30].values data = dataset.iloc[:,0:30].values # print(target) # print(data) machine = RandomForestClassifier(n_estimators=21, criterion="gini", max_depth=30) r2_scores, accuracy_scores, confusion_matrices = kfold_template.run_kfold(3, data, target, machine, 1, 1) print(r2_scores) print(accuracy_scores) for i in confusion_matrices: print(i)
sub_dataset = dataset.sample(n=1000) #print(sub_dataset) target = sub_dataset.iloc[:, 2].values data = sub_dataset.iloc[:, [5, 6, 7, 8, 12, 16]].values target = target.astype(int) max_depth_list = [10] n_estimators_list = [11] results_list = [] for i in max_depth_list: machine = RandomForestClassifier(n_estimators=11, criterion='gini', max_depth=i) accuracy_score, confusion_matrix = kfold_template.run_kfold( 4, data, target, machine) results_list.append(['Random Forest ', accuracy_score, str(i)]) for i in confusion_matrix: print(i) for k in n_estimators_list: machine = RandomForestClassifier(n_estimators=k, criterion='gini', max_depth=10) accuracy_score, confusion_matrix = kfold_template.run_kfold( 4, data, target, machine) results_list.append(['Random Forest ', accuracy_score, str(k)]) for i in confusion_matrix: print(i) results = pandas.DataFrame(results_list)
#plt.scatter(data[:,0], data[:,1], c=target) #plt.savefig('plot_circles.png') ''' try to use higher dimension and linear kernel, it will give you more accurate result than using non-linear kernel directly. ''' ## [1.0, 0.99, 0.99, 1.0, 1.0] after adding 3rd dimension, we have much higher ## score with linear kernel machine = svm.SVC(kernel='linear', tol = 0.000001) accu_score, conf_matrix = kfold_template.run_kfold(5, data, target, machine) print(accu_score) for i in conf_matrix: print(i)
import kfold_template import pandas from sklearn import linear_model dataset = pandas.read_csv("logistic_dataset.csv") target = dataset.iloc[:, 2].values data = dataset.iloc[:, 3:9].values r2_scores, accuracy_scores, confusion_matrices = kfold_template.run_kfold( 5, data, target, linear_model.LogisticRegression(multi_class="auto", solver="lbfgs"), 1, 1) print(r2_scores) print(accuracy_scores) for confusion_matrix in confusion_matrices: print(confusion_matrix) machine = linear_model.LogisticRegression(multi_class="auto", solver="lbfgs") machine.fit(data, target) X = [ [24, 55, 31, 3, 0, 7], [40, 50, 2, 5, 1, 8], [3, 95, 37, 3, 1, 15], ] results = machine.predict(X) print(results)