def train_script_2(): dbreader = DbReader(PATH, split_size=ONE_PERSON_SPLIT) training_commands = getting_commands_from_signals( dbreader.training_signals[:2], dbreader.training_text[:2]) valid_commands = getting_commands_from_signals( dbreader.training_signals[2:], dbreader.training_text[2:]) training_mfcc_data = simple_mfcc(training_commands) valid_mfcc_data = simple_mfcc(valid_commands) y_train = training_mfcc_data['command'] X_train = training_mfcc_data.drop(columns=['command']) y_valid = valid_mfcc_data['command'] X_valid = valid_mfcc_data.drop(columns=['command']) rf_model = RandomForestModel() model_to_fit = rf_model.gridsearchCV() model_to_fit.fit(X_train, y_train) rf_model.set_internal_model(model_to_fit.best_estimator_) print(model_to_fit.best_estimator_) rf_model.save_model() joblib.dump(dbreader, "dbreader.mdl") predictions = rf_model.predict(X_valid) plot_confusion_matrix(y_valid, predictions)
def evaluating_script(): db_reader = DbReader() hyper_dataset = db_reader.load_csv("../allhyper.test") hypo_dataset = db_reader.load_csv("../allhypo.test") X, y = create_dataset_for_evaluation(hyper_dataset, hypo_dataset) X = preprocess_the_data(X) rf_model = RandomForestModel() load_model = rf_model.load_model() with open('selected_best_features.data', 'rb') as filehandle: filtered_features = pickle.load(filehandle) predicted_values = load_model.predict(X[filtered_features]) print(rf_model.__class__.__name__) print(classification_report(y, predicted_values))
def train_script(training_X, training_y): models = [ LinearRegressionModel(), SVM_Model(), LogisticRegressionModel(), RandomForestModel() ] for model in models: fitting_model = model.gridsearchCV() fitting_model.fit(training_X, training_y) print(fitting_model.best_score_) model.set_internal_model(fitting_model.best_estimator_) model.save_model()
def evaluating_script(test_X, test_y): models = [ LinearRegressionModel(), SVM_Model(), LogisticRegressionModel(), RandomForestModel() ] for model in models: load_model = model.load_model() predicted_values = load_model.predict(test_X) print(model.__class__.__name__) print(classification_report(test_y, predicted_values))
def train_script(): dbreader = DbReader(PATH, split_size=ONE_PERSON_SPLIT) commands = getting_commands_from_signals(dbreader.training_signals, dbreader.training_text) mfcc_data = simple_mfcc(commands) y_train = mfcc_data['command'] X_train = mfcc_data.drop(columns=['command']) rf_model = RandomForestModel() model_to_fit = rf_model.gridsearchCV() model_to_fit.fit(X_train, y_train) rf_model.set_internal_model(model_to_fit.best_estimator_) print(model_to_fit.best_estimator_) rf_model.save_model() joblib.dump(dbreader, "dbreader.mdl")
def train_script(): db_reader = DbReader() hyper_dataset = db_reader.load_csv("../allhyper.data") hypo_dataset = db_reader.load_csv("../allhypo.data") X, y = create_dataset_for_training(hyper_dataset, hypo_dataset) X = preprocess_the_data(X) rf_model = RandomForestModel() filtered_features = feature_selection(X, y, rf_model.internal_model) with open('selected_best_features.data', 'wb') as filehandle: pickle.dump(filtered_features,filehandle) model_to_fit = rf_model.gridsearchCV() model_to_fit.fit(X[filtered_features], y) print(model_to_fit.best_score_) print(model_to_fit.best_params_) print(filtered_features) rf_model.set_internal_model(model_to_fit.best_estimator_) rf_model.save_model()
from Model import RandomForestModel ## Test 1 model = RandomForestModel(X_train=[[1, 2, 3], [11, 12, 13]], y_train = [0, 1], X_test=[[3, 4, 1],[14, 11, 17]], n_estimators=1) model.fit() out = list(model.predict()) desired_out = [0, 1] print("Desired out:" + "\t" + str(desired_out)) print("Actual out:" + "\t" + str(out)) for index in range(0, len(out)): if out[index]!=desired_out[index]: print("Test 1 failed") exit(0) print("Test 1 passed")
def main(): reader = DbReader() plotter = Plotter(reader) # LABELS labels = list(range(3)) # labels = list(range(1, 11)) # OBA RODZAJE # 3 klasy train_X, val_X, test_X, train_y, val_y, test_y = reader.get_packed_data() # 10 klas # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_splitted_data() # ONLY RED # 3 klasy # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_red_packed_data() # 10 klas # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_red_data() # ONLY WHITE # 3 klasy # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_white_packed_data() # 10 klas # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_white_data() # scaler = StandardScaler() # scaler.fit_transform(train_X, train_y) # scaler.transform(val_X, val_y) models = [ LinearRegressionModel(), LogisticRegressionModel(), SVMModel(), RandomForestModel() ] model_names = [ 'LinRegModel', 'LogRegModel', 'SVMModel', 'RandomForestModel' ] # test_X, test_y = train_X, train_y # zapis do pliku MAE, MSE dla test with open(stats_file_path, "a") as stat_file: stat_file.write("Model errors:\n") for i, model in enumerate(models): model.load() mae = model.get_mae(test_X, test_y) mse = model.get_mse(test_X, test_y) print( f"Model name: {model.name:27} MAE: {mae:{6}.{4}} MSE: {mse:{6}.{4}}" ) stat_file.write((f"\t{model_names[i]:{22}}: ")) stat_file.write(f"MAE = {mae:{8}.{4}} ") stat_file.write(f"MSE = {mse:{8}.{4}} ") stat_file.write(f"SCORE = {model.score(test_X, test_y)}\n") plotter.heatmap() plotter.kdeplot() plotter.pairplot() plotter.confusion_matrix(test_y, model.predict(test_X)) plotter.classification_report(test_y, model.predict(test_X), labels)
def main(): warnings.simplefilter("ignore") warnings.warn("deprecated", DeprecationWarning) # INIT reader = DbReader() plotter = Plotter(reader) # OBA RODZAJE # 3 klasy train_X, val_X, test_X, train_y, val_y, test_y = reader.get_packed_data() # 10 klas # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_splitted_data() # ONLY RED # 3 klasy # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_red_packed_data() # 10 klas # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_red_data() # ONLY WHITE # 3 klasy # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_white_packed_data() # 10 klas # train_X, val_X, test_X, train_y, val_y, test_y = reader.get_white_data() models = [ LinearRegressionModel(), LogisticRegressionModel(), SVMModel(), RandomForestModel() ] model_names = [ 'LinRegModel', 'LogRegModel', 'SVMModel', 'RandomForestModel' ] params = [ dict(fit_intercept=[True, False], normalize=[True, False]), # true, 1, false dict(tol=[1e-3, 1e-4, 1e-5], C=[1, 10, 20, 30], fit_intercept=[True, False], warm_start=[True, False]), # 1e-4, 20, true, true dict(C=[2, 5, 10, 20, 30, 50], gamma=[0.1, 0.01, 0.0001, 0.00001]), # gamma:rb, pl, sim # 20, 1e-5 dict(n_estimators=[50, 100, 165, 200, 300, 500, 700], max_depth=[10, 20, 33, 40]) # 33, 165 ] best_params = [] times = [] # GSCV for i, model in enumerate(models): clf = GridSearchCV(models[i].model, params[i], cv=5, refit=False) clf.fit(train_X, train_y) print(f"{model_names[i]}") print(f"Best params: {clf.best_params_}") print(f"Best score: {clf.best_score_}") print(f"Worst score: {clf.cv_results_['mean_test_score'].min()}\n") best_params.append(clf.best_params_) # LEARN WITH BEST_PARAMS for i, model in enumerate(models): model.set_estimator(best_params[i]) t_start = time() model.fit(train_X, train_y) t_end = time() mae = model.get_mae(val_X, val_y) mse = model.get_mse(val_X, val_y) times.append(t_end - t_start) model.save() print( f"Model name: {model.name:27} MAE: {mae:{6}.{4}} MSE: {mse:{6}.{4}} t: {times[i]}" ) # SAVE LOGS TO FILE with open(stats_file_path, "a") as stat_file: stat_file.write("Duration times of model fitting:\n") for i, model in enumerate(model_names): stat_file.write((f"\t{model_names[i]:{22}}: {times[i]}\n")) stat_file.write('\n')