Python preprocess 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: metrics

메소드/함수: preprocess

hotexamples.com에서의 예제들: 6

Python preprocess - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 metrics.preprocess에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

import numpy as np
from sklearn.ensemble import RandomForestClassifier
import metrics
import matplotlib.pyplot as plt

maxi = 0
ideal = (0, 0)

num_features = []
accs = []

for features in [5, 6, 13]:
    data, target = metrics.preprocess(k=features, fsiter=100)
    temp = metrics.repeatedCrossValidatedScores(
        data,
        target,
        RandomForestClassifier(n_estimators=256),
        iterations=100,
        cv=10)
    metrics.printAverages(features, temp)

    num_features.append(features)
    accs.append(np.average(temp['test_accuracy']))

print(str(ideal) + " gives " + str(maxi) + "% accuracy")

acc, = plt.plot(num_features, accs, label='Accuracy')
plt.title("Feature Selection for Random Forest", fontsize=14)
plt.xlabel('Number of Features')
plt.ylabel('Repeated-Cross-Validation Accuracy (%)')
plt.yticks([0.85, 0.90, 0.95, 1], ["85", "90", "95", "100"])

예제 #2

파일 보기

파일: plotFeatures.py 프로젝트: Negative-Feedback/CKD-Tech-Project

import matplotlib.pyplot as plt
import metrics
import warnings
warnings.filterwarnings("ignore")

data, target = metrics.preprocess()

barHeights = metrics.FeatureSelection(data, target)
features = [
    'age', 'blood pressure', 'specific gravity', 'albumin', 'sugar',
    'red blood cells', 'pus cell', 'pus cell clumps', 'bacteria',
    'blood glucose random', 'blood urea', 'serum creatinine', 'sodium',
    'potassium', 'hemoglobin', 'packed cell volume', 'white blood cell count',
    'red blood cell count', 'hypertension', 'diabetes mellitus',
    'coronary artery disease', 'appetite', 'pedal edema', 'anemia'
]
fig, ax = plt.subplots()
plt.bar(range(1, 25), barHeights)
plt.subplots_adjust(bottom=0.28, left=0.1)
plt.yticks([0, 0.05, 0.1, 0.15, 0.20], ["0%", "5%", "10%", "15%", "20%"],
           size=7)
plt.xticks(range(1, 25), features, rotation=270, size=7)
plt.title('Importance of Each Feature', size=16)
plt.ylabel('Relative Importance (%)', size=8)
plt.show()

예제 #3

파일 보기

import numpy as np
from sklearn.neural_network import MLPClassifier
import metrics
import warnings
warnings.filterwarnings("ignore")

data, target = metrics.preprocess(k=8, fsiter=1000)

# 16, 14, 11 is the best so far, 6,3 was the best for 2 layers
hlayers = [6, (12, 5)]

param_grid = [{
    'hidden_layer_sizes': hlayers,
    'alpha': [0.01, 0.005, 0.001, 0.0005, 0.0001, 0.00005]
}]

# metrics.OptimizeClassifier(data, target, MLPClassifier(solver='lbfgs', random_state=1), param_grid)
# Current Best: 0.931 (+/-0.068) for {'activation': 'relu', 'alpha': 0.001, 'hidden_layer_sizes': 73}
# 0.938 (+/-0.069) for {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': 73}
# 0.951 (+/-0.057) for {'activation': 'relu', 'alpha': 0.0001, 'hidden_layer_sizes': 43}
# 0.945 (+/-0.085) for {'activation': 'relu', 'alpha': 1e-06, 'hidden_layer_sizes': 43}
# 0.948 (+/-0.057) for {'alpha': 0.01, 'hidden_layer_sizes': (35, 26)}
# 0.942 (+/-0.074) for {'alpha': 5e-05, 'hidden_layer_sizes': (30, 11)}
# 0.952 (+/-0.057) for {'alpha': 0.0001, 'hidden_layer_sizes': (30, 11)}
# 0.947 (+/-0.036) for {'alpha': 0.001, 'hidden_layer_sizes': (30, 11)}
#

# temporary values to be replaced
ideal = [0, 0, 0]
maxi = 0.
# graph = np.zeros(hlayers.count)

예제 #4

파일 보기

파일: CVROC.py 프로젝트: Negative-Feedback/CKD-Tech-Project

import numpy as np
from scipy import interp
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import StratifiedKFold
import metrics
from sklearn.preprocessing import label_binarize

# #############################################################################
# Data IO and generation

X, y = metrics.preprocess(k=8, fsiter=1000)
y = label_binarize(y, classes=["0", "1"]).ravel()

# #############################################################################
# Classification and ROC analysis

# Run classifier with cross-validation and plot ROC curves
cv = StratifiedKFold(n_splits=10, shuffle=True)
classifier = KNeighborsClassifier(n_neighbors=1)

tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)

i = 0
for iterations in range(10000):
    for train, test in cv.split(X, y):

예제 #5

파일 보기

파일: OptimizationWith1Layer.py 프로젝트: Negative-Feedback/CKD-Tech-Project

import arff
import numpy as np
from sklearn.preprocessing import Imputer
from sklearn.neural_network import MLPClassifier
from imblearn.over_sampling import SMOTE
import metrics
import warnings
warnings.filterwarnings("ignore")

data, target = metrics.preprocess(k=8, fsiter=1000, scaling=False)

# default values
ideal = [0]
maxi = 0

# check a lot of hidden layer configurations for sets with high accuracy
print("hlayers/tp/tn/fp/fn/f1/precision/sensitivity/specificity/accuracy")
for x in range(1, 100):
    temp = metrics.repeatedCrossValidatedScores(data,
                                                target,
                                                MLPClassifier(
                                                    solver='lbfgs',
                                                    alpha=1e-5,
                                                    hidden_layer_sizes=x,
                                                    random_state=1,
                                                ),
                                                iterations=20,
                                                cv=10)
    metrics.printAverages(x, temp)
    if np.average(temp['test_f1']) > maxi:
        maxi = np.average(temp['test_f1'])

예제 #6

파일 보기

import metrics
import numpy as np
import matplotlib.pyplot as plt
from sklearn import tree
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier

data, target = metrics.preprocess()
classifiers = {
    'Support Vector Machine':
    svm.SVC(C=1,
            kernel='linear',
            decision_function_shape='ovo',
            random_state=6),
    'Random Forest':
    RandomForestClassifier(n_estimators=184),
    'Logistic Regression':
    LogisticRegression(C=1000),
    'Nearest Neighbours':
    KNeighborsClassifier(n_neighbors=1),
    'Decision Tree':
    tree.DecisionTreeClassifier(),
    'Neural Network':
    MLPClassifier(solver='lbfgs',
                  alpha=0.001,
                  hidden_layer_sizes=54,
                  random_state=1)
}