def trainWithHotEncoding(hot_encoded_train_features, hot_encoded_train_labels, hot_encoded_test_features, hot_encoded_test_labels, results, algorithms, isTesting): # TRAIIN RANDOM FOREST class_weights = class_weight.compute_class_weight( 'balanced', np.unique(hot_encoded_train_labels.flatten()), hot_encoded_train_labels.flatten()) param_grid = { 'max_features': [ # None, # "sqrt", "log2" ], 'n_estimators': [ # 1000, 2000, # 3000 ] } if (isTesting): print("TESTING MODE RF: ONLY TRAINING 1 MODEL") param_grid = {} rf_hot_encoding = RandomForest(class_weights=class_weights, param_grid=param_grid) rf_hot_encoding.title = "RANDOM FOREST HOT ENCODING TRAIN" rf_hot_encoding.train(hot_encoded_train_features, hot_encoded_train_labels) model_name = 'RF-HOT.model' print("SAVING MODEL: ", model_name) try: joblib.dump(rf_hot_encoding.model, model_name) except Exception as e: print("Cannot save {} because: \n\n".format(model_name), str(e)) algorithms["RANDOM FOREST HOT ENCODING"] = rf_hot_encoding rf_hot_encoding.drawCurves(X=hot_encoded_train_features, y=hot_encoded_train_labels) rf_hot_encoding.title = "RANDOM FOREST HOT ENCODING TEST" results["RANDOM FOREST HOT ENCODING"] = rf_hot_encoding.drawCurves( X=hot_encoded_test_features, y=hot_encoded_test_labels) return rf_hot_encoding, results, algorithms
def trainWithFrecuencies(tetra_freq_train_features, tetra_freq_train_labels, tetra_freq_test_features, tetra_freq_test_labels, results, algorithms, isTesting): class_weights = class_weight.compute_class_weight( 'balanced', np.unique(tetra_freq_train_labels.flatten()), tetra_freq_train_labels.flatten()) param_grid = { 'max_features': [ # None, # "sqrt", "log2" ], 'n_estimators': [ # 1000, 2000, # 3000 ] } if (isTesting): print("TESTING MODE RF: ONLY TRAINING 1 MODEL") param_grid = {} rf = RandomForest(class_weights=class_weights, param_grid=param_grid) rf.title = "RANDOM FOREST TETRA NUCLEOTIDE FREQUENCY TRAIN" rf.train(tetra_freq_train_features, tetra_freq_train_labels) model_name = 'RF-TETRA.model' print("SAVING MODEL USING JOBLIB: ", model_name) try: joblib.dump(rf.model, model_name) except Exception as e: print("Cannot save {} because: \n\n".format(model_name), str(e)) algorithms["RANDOM FOREST"] = rf rf.drawCurves(X=tetra_freq_train_features, y=tetra_freq_train_labels) rf.title = "RANDOM FOREST TETRA NUCLEOTIDE FREQUENCY TEST" results["RANDOM FOREST"] = rf.drawCurves(X=tetra_freq_test_features, y=tetra_freq_test_labels) return rf, results, algorithms