Esempio n. 1
0
target = dataset.iloc[:, 30].values
data = dataset.iloc[:, 0:30].values

#print(target)
#print(data)

#machine = RandomForestClassifier(criterion = 'gini', max_depth=80, n_estimators = 11)

depth = [10, 20, 30, 50, 60]
for dep in depth:
    machine = RandomForestClassifier(criterion='gini',
                                     max_depth=dep,
                                     n_estimators=11)

    acc_score, conf_matrix = run_kfold(4, data, target, machine)
    print('\n\n')
    print('depth = ', str(dep))
    print(acc_score)
    for i in conf_matrix:
        print(i)
'''
depth = 80
accuracy_score:
[0.90474, 0.9046, 0.90586, 0.90568]

confusion_matrix:
[[  258   429     5     0]
 [   39 33098  1105     3]
 [    1  2342  7749   230]
 [    0     8   601  4132]]
Esempio n. 2
0
import kfold_template
import pandas as pd
from sklearn import linear_model

dataset = pd.read_csv('regression_dataset.csv')

# y1 is continuous Y, y2 is binomial. here we use y1
#target = dataset.iloc[:,1].values
target = dataset.iloc[:, 2].values
data = dataset.iloc[:, 3:].values
machine = linear_model.LogisticRegression()
acc_score, conf_matr = kfold_template.run_kfold(4, data, target, machine)

print(acc_score, conf_matr)

# print readable results
for item in conf_matr:
    print(item)
Esempio n. 3
0
import kfold_template
from matplotlib import pyplot as plt
from sklearn.datasets.samples_generator import make_blobs
from sklearn import svm

# Data generating
data, target = make_blobs(n_samples=400,
                          centers=2,
                          cluster_std=1,
                          random_state=0)

print(data)
print(target)
plt.scatter(data[:, 0], data[:, 1], c=target)
plt.savefig("plot.png")

r2_scores, accuracy_scores, confusion_matrices = kfold_template.run_kfold(
    5, data, target, svm.SVC(kernel="linear"), 1, 1)

print(r2_scores)
print(accuracy_scores)
for i in confusion_matrices:
    print(i)
import kfold_template
from sklearn import linear_model
import pandas

dataset = pandas.read_csv("regression_dataset.csv")

target = dataset.iloc[:, 2].values
data = dataset.iloc[:, 3:9].values

machine = linear_model.LogisticRegression()

results_accuracy, results_confusion = kfold_template.run_kfold(
    4, data, target, machine)

print(results_accuracy)

for i in results_confusion:
    print(i)
import kfold_template

import pandas

# from sklearn import tree
from sklearn.ensemble import RandomForestClassifier


dataset = pandas.read_csv("dataset.csv")

target = dataset.iloc[:,30].values
data = dataset.iloc[:,0:30].values

# print(target)
# print(data)

machine = RandomForestClassifier(n_estimators=21, criterion="gini", max_depth=30)

r2_scores, accuracy_scores, confusion_matrices = kfold_template.run_kfold(3, data, target, machine, 1, 1)

print(r2_scores)
print(accuracy_scores)
for i in confusion_matrices:
	print(i)
Esempio n. 6
0
sub_dataset = dataset.sample(n=1000)
#print(sub_dataset)

target = sub_dataset.iloc[:, 2].values
data = sub_dataset.iloc[:, [5, 6, 7, 8, 12, 16]].values

target = target.astype(int)
max_depth_list = [10]
n_estimators_list = [11]
results_list = []

for i in max_depth_list:
    machine = RandomForestClassifier(n_estimators=11,
                                     criterion='gini',
                                     max_depth=i)
    accuracy_score, confusion_matrix = kfold_template.run_kfold(
        4, data, target, machine)
    results_list.append(['Random Forest ', accuracy_score, str(i)])
    for i in confusion_matrix:
        print(i)

for k in n_estimators_list:
    machine = RandomForestClassifier(n_estimators=k,
                                     criterion='gini',
                                     max_depth=10)
    accuracy_score, confusion_matrix = kfold_template.run_kfold(
        4, data, target, machine)
    results_list.append(['Random Forest ', accuracy_score, str(k)])
    for i in confusion_matrix:
        print(i)

results = pandas.DataFrame(results_list)
Esempio n. 7
0
#plt.scatter(data[:,0], data[:,1], c=target)
#plt.savefig('plot_circles.png')



'''
try to use higher dimension and linear kernel,
it will give you more accurate result than using non-linear kernel directly.
'''


## [1.0, 0.99, 0.99, 1.0, 1.0] after adding 3rd dimension, we have much higher
## score with linear kernel
machine = svm.SVC(kernel='linear', tol = 0.000001)
accu_score, conf_matrix = kfold_template.run_kfold(5, data, target, machine)
print(accu_score)
for i in conf_matrix:
    print(i)











import kfold_template
import pandas
from sklearn import linear_model

dataset = pandas.read_csv("logistic_dataset.csv")
target = dataset.iloc[:, 2].values
data = dataset.iloc[:, 3:9].values

r2_scores, accuracy_scores, confusion_matrices = kfold_template.run_kfold(
    5, data, target,
    linear_model.LogisticRegression(multi_class="auto", solver="lbfgs"), 1, 1)

print(r2_scores)
print(accuracy_scores)
for confusion_matrix in confusion_matrices:
    print(confusion_matrix)

machine = linear_model.LogisticRegression(multi_class="auto", solver="lbfgs")
machine.fit(data, target)

X = [
    [24, 55, 31, 3, 0, 7],
    [40, 50, 2, 5, 1, 8],
    [3, 95, 37, 3, 1, 15],
]

results = machine.predict(X)
print(results)