예제 #1
0
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import metrics
import matplotlib.pyplot as plt

maxi = 0
ideal = (0, 0)

num_features = []
accs = []

for features in [5, 6, 13]:
    data, target = metrics.preprocess(k=features, fsiter=100)
    temp = metrics.repeatedCrossValidatedScores(
        data,
        target,
        RandomForestClassifier(n_estimators=256),
        iterations=100,
        cv=10)
    metrics.printAverages(features, temp)

    num_features.append(features)
    accs.append(np.average(temp['test_accuracy']))

print(str(ideal) + " gives " + str(maxi) + "% accuracy")

acc, = plt.plot(num_features, accs, label='Accuracy')
plt.title("Feature Selection for Random Forest", fontsize=14)
plt.xlabel('Number of Features')
plt.ylabel('Repeated-Cross-Validation Accuracy (%)')
plt.yticks([0.85, 0.90, 0.95, 1], ["85", "90", "95", "100"])
import matplotlib.pyplot as plt
import metrics
import warnings
warnings.filterwarnings("ignore")

data, target = metrics.preprocess()

barHeights = metrics.FeatureSelection(data, target)
features = [
    'age', 'blood pressure', 'specific gravity', 'albumin', 'sugar',
    'red blood cells', 'pus cell', 'pus cell clumps', 'bacteria',
    'blood glucose random', 'blood urea', 'serum creatinine', 'sodium',
    'potassium', 'hemoglobin', 'packed cell volume', 'white blood cell count',
    'red blood cell count', 'hypertension', 'diabetes mellitus',
    'coronary artery disease', 'appetite', 'pedal edema', 'anemia'
]
fig, ax = plt.subplots()
plt.bar(range(1, 25), barHeights)
plt.subplots_adjust(bottom=0.28, left=0.1)
plt.yticks([0, 0.05, 0.1, 0.15, 0.20], ["0%", "5%", "10%", "15%", "20%"],
           size=7)
plt.xticks(range(1, 25), features, rotation=270, size=7)
plt.title('Importance of Each Feature', size=16)
plt.ylabel('Relative Importance (%)', size=8)
plt.show()
예제 #3
0
import numpy as np
from sklearn.neural_network import MLPClassifier
import metrics
import warnings
warnings.filterwarnings("ignore")

data, target = metrics.preprocess(k=8, fsiter=1000)

# 16, 14, 11 is the best so far, 6,3 was the best for 2 layers
hlayers = [6, (12, 5)]

param_grid = [{
    'hidden_layer_sizes': hlayers,
    'alpha': [0.01, 0.005, 0.001, 0.0005, 0.0001, 0.00005]
}]

# metrics.OptimizeClassifier(data, target, MLPClassifier(solver='lbfgs', random_state=1), param_grid)
# Current Best: 0.931 (+/-0.068) for {'activation': 'relu', 'alpha': 0.001, 'hidden_layer_sizes': 73}
# 0.938 (+/-0.069) for {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': 73}
# 0.951 (+/-0.057) for {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': 43}
# 0.945 (+/-0.085) for {'activation': 'relu', 'alpha': 1e-06, 'hidden_layer_sizes': 43}
# 0.948 (+/-0.057) for {'alpha': 0.01, 'hidden_layer_sizes': (35, 26)}
# 0.942 (+/-0.074) for {'alpha': 5e-05, 'hidden_layer_sizes': (30, 11)}
# 0.952 (+/-0.057) for {'alpha': 0.0001, 'hidden_layer_sizes': (30, 11)}
# 0.947 (+/-0.036) for {'alpha': 0.001, 'hidden_layer_sizes': (30, 11)}
#

# temporary values to be replaced
ideal = [0, 0, 0]
maxi = 0.
# graph = np.zeros(hlayers.count)
예제 #4
0
import numpy as np
from scipy import interp
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import StratifiedKFold
import metrics
from sklearn.preprocessing import label_binarize

# #############################################################################
# Data IO and generation

X, y = metrics.preprocess(k=8, fsiter=1000)
y = label_binarize(y, classes=["0", "1"]).ravel()

# #############################################################################
# Classification and ROC analysis

# Run classifier with cross-validation and plot ROC curves
cv = StratifiedKFold(n_splits=10, shuffle=True)
classifier = KNeighborsClassifier(n_neighbors=1)

tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)

i = 0
for iterations in range(10000):
    for train, test in cv.split(X, y):
import arff
import numpy as np
from sklearn.preprocessing import Imputer
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
import metrics
import warnings
warnings.filterwarnings("ignore")

data, target = metrics.preprocess(k=8, fsiter=1000, scaling=False)

# default values
ideal = [0]
maxi = 0

# check a lot of hidden layer configurations for sets with high accuracy
print("hlayers/tp/tn/fp/fn/f1/precision/sensitivity/specificity/accuracy")
for x in range(1, 100):
    temp = metrics.repeatedCrossValidatedScores(data,
                                                target,
                                                MLPClassifier(
                                                    solver='lbfgs',
                                                    alpha=1e-5,
                                                    hidden_layer_sizes=x,
                                                    random_state=1,
                                                ),
                                                iterations=20,
                                                cv=10)
    metrics.printAverages(x, temp)
    if np.average(temp['test_f1']) > maxi:
        maxi = np.average(temp['test_f1'])
예제 #6
0
import metrics
import numpy as np
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier

data, target = metrics.preprocess()
classifiers = {
    'Support Vector Machine':
    svm.SVC(C=1,
            kernel='linear',
            decision_function_shape='ovo',
            random_state=6),
    'Random Forest':
    RandomForestClassifier(n_estimators=184),
    'Logistic Regression':
    LogisticRegression(C=1000),
    'Nearest Neighbours':
    KNeighborsClassifier(n_neighbors=1),
    'Decision Tree':
    tree.DecisionTreeClassifier(),
    'Neural Network':
    MLPClassifier(solver='lbfgs',
                  alpha=0.001,
                  hidden_layer_sizes=54,
                  random_state=1)
}