Esempio n. 1
0
import pandas as pd
import numpy as np
from sklearn.ensemble import ExtraTreesClassifier
import matplotlib.pyplot as plt
from common.import_data import ImportData

if __name__ == "__main__":

    data_set = ImportData()
x: np.ndarray = data_set.import_train_data()
y: np.ndarray = data_set.import_columns_train(np.array(['quality']))
name_of_columns: np.ndarray = data_set.import_names_of_columns()
model = ExtraTreesClassifier()
model.fit(x, y)
print(model.feature_importances_)
feat_importances = pd.Series(model.feature_importances_,
                             index=[
                                 'fixed acidity', 'volatile acidity',
                                 'citric acid', 'residual sugar', 'chlorides',
                                 'free sulfur dioxide', 'total sulfur dioxide',
                                 'density', 'ph', 'sulphates', 'alcohol'
                             ])
feat_importances.nlargest(11).plot(kind='barh')
plt.xlabel("Znaczenie cech")
plt.show()
Esempio n. 2
0
import numpy as np

from common.import_data import ImportData
from collections import Counter
from imblearn.under_sampling import AllKNN

if __name__ == "__main__":
    data_set = ImportData()
    x: np.ndarray = data_set.import_all_data()
    y: np.ndarray = data_set.import_columns(np.array(['Class'])).ravel()
    print('Original dataset shape %s' % Counter(y))
    allknn = AllKNN()
    x_res, y_res = allknn.fit_resample(x, y)
    print('Reduced dataset shape %s' % Counter(y_res))
Esempio n. 3
0
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from common.import_data import ImportData
from sklearn.model_selection import KFold
import sklearn.metrics as metrics

if __name__ == "__main__":
    data_set = ImportData()

    y: np.ndarray = data_set.import_classes(['class']).ravel()

    x_tmp: np.ndarray = data_set.import_columns(
        './dataset3/pokerhand.csv',
        ['s1', 'r1', 's2', 'r2', 's3', 'r3', 's4', 'r4', 's5', 'r5'])

    kf_for_test = KFold(n_splits=10, shuffle=True)
    result_tmp = next(kf_for_test.split(x_tmp), None)
    x_test = x_tmp[result_tmp[1]]
    y_test = y[result_tmp[1]]

    kf_AllKNN = KFold(n_splits=5, shuffle=True)
    #x = data_set.import_columns(['s1','r1','s2','r2','s3','r3','s4','r4','s5','r5'])
    x_AllKNN: np.ndarray = data_set.import_columns(
        './dataset3/reduced_dataset_AllKNN_pokerhand.csv',
        ['s1', 'r1', 's2', 'r2', 's3', 'r3', 's4', 'r4', 's5', 'r5'])
    #x = data_set.import_all_data()
    y = data_set.import_classes_for_reduced_datasets(
        './dataset3/reduced_dataset_AllKNN_pokerhand.csv', np.array(['class']))

    result_AllKNN = next(kf_AllKNN.split(x_AllKNN), None)
Esempio n. 4
0
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from common.import_data import ImportData

if __name__ == "__main__":
    data_set = ImportData()

    x_train, x_test, y_train, y_test = \
        train_test_split(data_set.import_all_data(),
                         data_set.import_columns
                         (np.array(['quality'])),
                         test_size=0.2, random_state=13)

    NN = MLPClassifier(solver='adam',
                       alpha=0.0001,
                       hidden_layer_sizes=(21, 3),
                       random_state=1,
                       max_iter=2000,
                       verbose=1).fit(x_train, y_train.ravel())
    predictions = NN.predict(x_train)
    print(predictions)
    print(round(NN.score(x_test, y_test.ravel()), 4))
Esempio n. 5
0
import numpy as np

from sklearn.model_selection import train_test_split
from common.import_data import ImportData
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

if __name__ == "__main__":
    data_set = ImportData()

    x_train, x_test, y_train, y_test = train_test_split(
        data_set.import_all_data(),
        data_set.import_columns(np.array(['Class'])),
        test_size=0.2,
        random_state=0)

    random_forest = RandomForestClassifier(n_estimators=100)
    random_forest.fit(x_train, y_train.ravel())
    y_prediction = random_forest.predict(x_test)
    print(y_test)
    print(y_prediction)
    print(accuracy_score(y_test, y_prediction.ravel()))