def test_create_final_reports_for_an_LR_strategy_with_id_78_and_2_saved_reports( self): mock_object = model_developement.Model_Selection() mock_dir_name = "saved_reports78" os.mkdir(mock_dir_name) mock_df1 = pd.DataFrame(data=np.array([[1, 2], [3, 4]]), columns=["attr1", "attr2"]) mock_df2 = pd.DataFrame(data=np.array([[5, 6], [7, 8]]), columns=["attr1", "attr2"]) mock_df1.to_csv(os.path.join(mock_dir_name, f"mock_csv1.csv"), index=False) mock_df2.to_csv(os.path.join(mock_dir_name, f"mock_csv2.csv"), index=False) expected = {"len": 4, "cols": 2, "value_1:0": 3, "value_3:1": 8} mock_object.create_final_reports() mock_final_rep_path = os.path.join("final_reports", "report_78.csv") self.assertTrue("report_78.csv" in os.listdir("final_reports")) self.assertEqual(len(pd.read_csv(mock_final_rep_path)), expected["len"]) self.assertEqual((len(pd.read_csv(mock_final_rep_path).columns)), expected["cols"]) self.assertEqual( pd.read_csv(mock_final_rep_path).iloc[1, 0], expected["value_1:0"]) self.assertEqual( pd.read_csv(mock_final_rep_path).iloc[3, 1], expected["value_3:1"]) self.assertEqual(type(mock_object.create_final_reports()), dict) shutil.rmtree("saved_reports78") shutil.rmtree("final_reports")
def test_get_best_model_characteristics(self): print(os.getcwd()) mock_object = model_developement.Model_Selection() mock_dirs = ["saved_reports78", "saved_reports98"] [os.mkdir(given_dir) for given_dir in mock_dirs] # create 3 mock reports, the 3rd on includes the best model mock_df1 = pd.DataFrame(columns=["models", "val_loss", "attr1"]) mock_df1["models"] = np.arange(1, 11, 1) mock_df1["val_loss"] = np.ones(10) mock_df1["attr1"] = np.random.rand(10, 1) mock_df1.iloc[4, 1] = 0.5 mock_df2 = pd.DataFrame(columns=["models", "val_loss", "attr1"]) mock_df2["models"] = np.arange(1, 11, 1) mock_df2["val_loss"] = np.ones(10) mock_df2["attr1"] = np.random.rand(10, 1) mock_df2.iloc[2, 1] = 0.2 mock_df3 = pd.DataFrame(columns=["models", "val_loss", "attr1"]) mock_df3["models"] = np.arange(1, 11, 1) mock_df3["val_loss"] = np.ones(10) mock_df3["attr1"] = np.random.rand(10, 1) mock_df3.iloc[2, 1] = 0.1 # use the mock reports to make 2 mock dirs; each one corresponds to one lr strategy mock_df1.to_csv(os.path.join(mock_dirs[0], f"mock_csv1.csv"), index=False) mock_df2.to_csv(os.path.join(mock_dirs[0], f"mock_csv2.csv"), index=False) mock_df1.to_csv(os.path.join(mock_dirs[1], f"mock_csv1.csv"), index=False) mock_df3.to_csv(os.path.join(mock_dirs[1], f"mock_csv2.csv"), index=False) expected = {"best_mod": 3, "best_perform": 0.1, "best_lr": "98"} self.assertEqual( mock_object.get_best_model_characteristics()["best_performance"], expected["best_perform"]) self.assertEqual( mock_object.get_best_model_characteristics()["best_model"], expected["best_mod"]) self.assertEqual( mock_object.get_best_model_characteristics()["best_lr_strategy"], expected["best_lr"]) shutil.rmtree("saved_reports78") shutil.rmtree("saved_reports98") shutil.rmtree("final_reports")
def test_multi_csv_to_dfs_for_2_csv_files(self): mock_object = model_developement.Model_Selection() mock_csvs_dir = "mock_csv_files" os.mkdir(mock_csvs_dir) mock_df1 = pd.DataFrame(data=np.array([[1, 2], [3, 4]]), columns=["attr1", "attr2"]) mock_df2 = pd.DataFrame(data=np.array([[5, 6], [7, 8]]), columns=["attr1", "attr2"]) mock_df1.to_csv(os.path.join(mock_csvs_dir, f"mock_csv1.csv"), index=False) mock_df2.to_csv(os.path.join(mock_csvs_dir, f"mock_csv2.csv"), index=False) expected = [list, pd.DataFrame] self.assertEqual( type(mock_object.multi_csv_to_dfs(given_dir=mock_csvs_dir)), expected[0]) self.assertEqual( type(mock_object.multi_csv_to_dfs(given_dir=mock_csvs_dir)[0]), expected[1]) self.assertTrue( all( mock_object.multi_csv_to_dfs( given_dir=mock_csvs_dir)[0] == mock_df1)) shutil.rmtree("mock_csv_files")
def test_create_final_dir_when_dir_exists_in_advance(self): mock_object = model_developement.Model_Selection() os.mkdir("final_reports") mock_object.create_final_dir() self.assertTrue("final_reports" in os.listdir()) shutil.rmtree("final_reports")
given_data=scaled_test_data) """ Learning rate strategy """ learning_rate = learning_rate_strategy.Learning_Rate_Strategy() learning_rate.plot_lr_strategy(show=True) learning_rate.create_lr_log(strategy_index=1, save=True) learning_rate_dict = learning_rate_strategy.create_lr_dict() """ Model development """ for key in learning_rate_dict: model_dev = model_developement.Model_Development( input_data=sliding_window_train_data, folder_customisation=key) models_dict = model_dev.create_models_dict() model_dev.train_models(given_models=models_dict, training_targets=sliding_window_train_target, lr_callback=learning_rate_dict[key]) """ Best Model Prediction """ model_selection = model_developement.Model_Selection() best_model_char = model_selection.get_best_model_characteristics() best_model = load_model( os.path.join(os.getcwd(), "saved_models" + best_model_char["best_lr_strategy"], "model_" + "1" + ".h5")) prediction = best_model.predict([sliding_window_test_data]) """ Final evaluation """ mse = MeanSquaredError() mae = MeanAbsoluteError() rmse_eval = tf.sqrt(mse(sliding_window_test_target, prediction)).numpy() mse_eval = mse(sliding_window_test_target, prediction).numpy() mae_eval = mae(sliding_window_test_target, prediction).numpy() print(f"RMSE = {rmse_eval}", f"MSE = {mse_eval}", f"MAE = {mae_eval}")