import pandas as pd import numpy as np from sklearn.ensemble import ExtraTreesClassifier import matplotlib.pyplot as plt from common.import_data import ImportData if __name__ == "__main__": data_set = ImportData() x: np.ndarray = data_set.import_train_data() y: np.ndarray = data_set.import_columns_train(np.array(['quality'])) name_of_columns: np.ndarray = data_set.import_names_of_columns() model = ExtraTreesClassifier() model.fit(x, y) print(model.feature_importances_) feat_importances = pd.Series(model.feature_importances_, index=[ 'fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'ph', 'sulphates', 'alcohol' ]) feat_importances.nlargest(11).plot(kind='barh') plt.xlabel("Znaczenie cech") plt.show()
import numpy as np from common.import_data import ImportData from collections import Counter from imblearn.under_sampling import AllKNN if __name__ == "__main__": data_set = ImportData() x: np.ndarray = data_set.import_all_data() y: np.ndarray = data_set.import_columns(np.array(['Class'])).ravel() print('Original dataset shape %s' % Counter(y)) allknn = AllKNN() x_res, y_res = allknn.fit_resample(x, y) print('Reduced dataset shape %s' % Counter(y_res))
import numpy as np from sklearn.neighbors import KNeighborsClassifier from common.import_data import ImportData from sklearn.model_selection import KFold import sklearn.metrics as metrics if __name__ == "__main__": data_set = ImportData() y: np.ndarray = data_set.import_classes(['class']).ravel() x_tmp: np.ndarray = data_set.import_columns( './dataset3/pokerhand.csv', ['s1', 'r1', 's2', 'r2', 's3', 'r3', 's4', 'r4', 's5', 'r5']) kf_for_test = KFold(n_splits=10, shuffle=True) result_tmp = next(kf_for_test.split(x_tmp), None) x_test = x_tmp[result_tmp[1]] y_test = y[result_tmp[1]] kf_AllKNN = KFold(n_splits=5, shuffle=True) #x = data_set.import_columns(['s1','r1','s2','r2','s3','r3','s4','r4','s5','r5']) x_AllKNN: np.ndarray = data_set.import_columns( './dataset3/reduced_dataset_AllKNN_pokerhand.csv', ['s1', 'r1', 's2', 'r2', 's3', 'r3', 's4', 'r4', 's5', 'r5']) #x = data_set.import_all_data() y = data_set.import_classes_for_reduced_datasets( './dataset3/reduced_dataset_AllKNN_pokerhand.csv', np.array(['class'])) result_AllKNN = next(kf_AllKNN.split(x_AllKNN), None)
import numpy as np from sklearn.model_selection import train_test_split from sklearn.neural_network import MLPClassifier from common.import_data import ImportData if __name__ == "__main__": data_set = ImportData() x_train, x_test, y_train, y_test = \ train_test_split(data_set.import_all_data(), data_set.import_columns (np.array(['quality'])), test_size=0.2, random_state=13) NN = MLPClassifier(solver='adam', alpha=0.0001, hidden_layer_sizes=(21, 3), random_state=1, max_iter=2000, verbose=1).fit(x_train, y_train.ravel()) predictions = NN.predict(x_train) print(predictions) print(round(NN.score(x_test, y_test.ravel()), 4))
import numpy as np from sklearn.model_selection import train_test_split from common.import_data import ImportData from sklearn.metrics import accuracy_score from sklearn.ensemble import RandomForestClassifier if __name__ == "__main__": data_set = ImportData() x_train, x_test, y_train, y_test = train_test_split( data_set.import_all_data(), data_set.import_columns(np.array(['Class'])), test_size=0.2, random_state=0) random_forest = RandomForestClassifier(n_estimators=100) random_forest.fit(x_train, y_train.ravel()) y_prediction = random_forest.predict(x_test) print(y_test) print(y_prediction) print(accuracy_score(y_test, y_prediction.ravel()))