def test_experiment_not_transformed_test(self): file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) data_manager = DataManager() data_manager.set_data(loaded_data) data_manager.split_data(test_split=0.19, train_split=0.62) learning_model = FakePredictionModel() exp = Experiment(data_manager, learning_model) exp.run_experiment() self.assertEquals(0, exp.get_r2(SplitTypes.Test))
def test_experiment(self): output_filename_header = FileLoader.create_output_file() time.sleep(1) loaded_algorithm_combinations = FileLoader.read_csv_file("../Datasets/test.csv") file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) # feature_eliminator = SelectKBest(f_regression,k=k_value) print (loaded_algorithm_combinations[0]) output_filename = FileLoader.create_output_file() for i in range(0, 80): normalizer = self.getnormalizer(loaded_algorithm_combinations[i][0]) feature_eliminator = self.getfeature_eliminator(loaded_algorithm_combinations[i][1]) the_model = self.get_model(loaded_algorithm_combinations[i][2]) print "taking ", type(normalizer).__name__, "and feature selector ", type( feature_eliminator ).__name__, "model", type(the_model).__name__ FileLoader.write_model_in_file( output_filename_header, type(normalizer).__name__, type(feature_eliminator).__name__, type(the_model).__name__, "", "", "", "", "", ) the_data_manager = DataManager(feature_eliminator, normalizer=normalizer) the_data_manager.set_data(loaded_data) the_data_manager.split_data(test_split=0.15, train_split=0.70) exp = Experiment(the_data_manager, the_model) exp.run_experiment() # arr_selected = feature_eliminator.get_support(indices=True) # if(exp.get_r2(SplitTypes.Train) > 0 and exp.get_r2(SplitTypes.Valid) > 0 and exp.get_r2(SplitTypes.Test) > 0): FileLoader.write_model_in_file( output_filename, type(normalizer).__name__, type(feature_eliminator).__name__, type(the_model).__name__, "", exp.fitness_matrix[0], exp.get_r2(SplitTypes.Train), exp.get_r2(SplitTypes.Valid), exp.get_r2(SplitTypes.Test), )
def test_experiment_sum_of_squares_zeros_test(self): the_data_manager = DataManager() an_array_of_all_ones = np.ones((37, 397)) the_model = svm.SVR() the_data_manager.set_data(an_array_of_all_ones) the_data_manager.split_data(test_split=0.19, train_split=0.62) exp = Experiment(the_data_manager, the_model) exp.run_experiment() sum_of_squares_test = exp.get_sum_of_squares(SplitTypes.Test) expected = 0 self.assertEquals(expected, sum_of_squares_test)
def test_experiment_svm_svr_37dataset_r2_train(self): file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) the_data_manager = DataManager() the_data_manager.set_data(loaded_data) the_data_manager.split_data(test_split=0.19, train_split=0.62) the_model = svm.SVR() exp = Experiment(the_data_manager, the_model) exp.run_experiment() r2_train = exp.get_r2(SplitTypes.Train) expected_svm_r2_value = 0.93994377385638073 self.assertEqual(r2_train, expected_svm_r2_value)
def test_experiment_all_zeros_r2_1(self): the_data_manager = DataManager() array_all_zeroes = np.zeros((37, 397)) the_data_manager.set_data(array_all_zeroes) the_data_manager.split_data(test_split=0.19, train_split=0.62) the_model = svm.SVR() exp = Experiment(the_data_manager, the_model) exp.run_experiment() r2_train = exp.get_r2(SplitTypes.Train) expected = 1.0 self.assertEqual(r2_train, expected)
def test_experiment_svr_37dataset_r2_test(self): file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) the_data_manager = DataManager() the_data_manager.set_data(loaded_data) the_data_manager.split_data(test_split=0.19, train_split=0.62) the_model = svm.SVR() exp = Experiment(the_data_manager, the_model) exp.run_experiment() r2_test = exp.get_r2(SplitTypes.Test) expected_svm_r2_value = -0.33005242525900247 self.assertEqual(r2_test, expected_svm_r2_value)
def test_split_merge_csv_4_25_8(self): file_loader = FileLoader() data_manager = DataManager() file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" result = file_loader.load_file(file_path) data_manager.set_data(result) data_manager.split_data(test_split=0.11,train_split=0.22) test_shapes = np.zeros((4, 397)).shape valid_shapes = np.zeros((25,397)).shape train_shapes = np.zeros((8, 397)).shape expected = np.array([test_shapes, valid_shapes, train_shapes]) result = np.array([data_manager.datum[SplitTypes.Test].shape, data_manager.datum[SplitTypes.Valid].shape, data_manager.datum[SplitTypes.Train].shape]) self.assertTrue(np.array_equal(result, expected))
def test_experiment_sum_of_squares_real37_test(self): file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) the_data_manager = DataManager() the_data_manager.set_data(loaded_data) the_model = svm.SVR() the_data_manager.split_data(test_split=0.19, train_split=0.62) exp = Experiment(the_data_manager, the_model) exp.run_experiment() sum_of_squares_test = exp.get_sum_of_squares(SplitTypes.Test) expected = 6.708898437500002 self.assertAlmostEqual(expected, sum_of_squares_test)
def test_split_merge_csv_7_7_23(self): file_loader = FileLoader() data_manager = DataManager() file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" result = file_loader.load_file(file_path) data_manager.set_data(result) data_manager.split_data(test_split=0.19,train_split=0.62) valid_and_test_shapes = (7, 397) train_shapes = (23, 397) expected = np.array([valid_and_test_shapes, valid_and_test_shapes, train_shapes]) result = np.array([data_manager.datum[SplitTypes.Test].shape, data_manager.datum[SplitTypes.Valid].shape, data_manager.datum[SplitTypes.Train].shape]) self.assertTrue(np.array_equal(result, expected))
def test_split_into_target_and_input(self): file_loader = FileLoader() data_manager = DataManager() file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" result = file_loader.load_file(file_path) data_manager.set_data(result) data_manager.split_data(test_split=0.11,train_split=0.22) test_shapes_input = np.zeros((4, 396)).shape valid_shapes_input = np.zeros((25,396)).shape train_shapes_input = np.zeros((8, 396)).shape test_shapes_target = np.zeros((4, )).shape valid_shapes_target = np.zeros((25,)).shape train_shapes_target = np.zeros((8, )).shape expected = np.array([test_shapes_input, valid_shapes_input, train_shapes_input, test_shapes_target, valid_shapes_target, train_shapes_target]) result = np.array([data_manager.inputs[SplitTypes.Test].shape, data_manager.inputs[SplitTypes.Valid].shape, data_manager.inputs[SplitTypes.Train].shape, data_manager.targets[SplitTypes.Test].shape, data_manager.targets[SplitTypes.Valid].shape, data_manager.targets[SplitTypes.Train].shape]) self.assertTrue(np.array_equal(result, expected))
from FileLoader import FileLoader from DataManager import DataManager from src.Population import Population file_path = "../Dataset/00-91-Drugs-All-In-One-File.csv" loaded_data = FileLoader.load_file(file_path) data_manager = DataManager(normalizer=None) data_manager.set_data(loaded_data) data_manager.split_data(test_split=0.15, train_split=0.70) population = Population() population.load_data() for i in range (1,50): print("row", i, population.data[i].sum())
# for k_value in range(13, 14): for the_model in the_models: print( "taking care of ", type(normalizer).__name__, "and feature selector ", type(feature_eliminator).__name__, "model", type(the_model).__name__, ) file_path = "../Datasets/HIV_37_Samples/MergedDataset.csv" loaded_data = FileLoader.load_file(file_path) # feature_eliminator = SelectKBest(f_regression,k=k_value) the_data_manager = DataManager(feature_eliminator, normalizer=normalizer) the_data_manager.set_data(loaded_data) the_data_manager.split_data(test_split=0.15, train_split=0.70) exp = Experiment(the_data_manager, the_model) exp.run_experiment() arr_selected = feature_eliminator.get_support(indices=True) if ( exp.get_r2(SplitTypes.Train) > 0 and exp.get_r2(SplitTypes.Valid) > 0 and exp.get_r2(SplitTypes.Test) > 0 ): print( feature_eliminator.get_support(indices=True), type(normalizer).__name__,