def test_should_contain_same_data(self): # given data = ClassifierData(are_samples_generated=False, filename='datasets.xlsx', number_of_dataset_if_not_generated=12) # when X1, y1 = ClassifLibrary.prepare_raw_data(data) X2, y2 = ClassifLibraryOld.load_samples_from_datasets_first_two_rows( classifier_data=ClassifierData( number_of_dataset_if_not_generated=12)) # then self.assertTrue(len(X2) == len(X1)) self.assertEqual(len(X1[0]), 2) self.assertEqual(len(X2[0]), 2)
def test_should_return_right_number_of_subplots_when_external_plots_drawn( self): # given data = ClassifierData(show_color_plot=True) # when target = ClassifLibrary.determine_number_of_subplots(data) # then self.assertEqual(self.NUMBER_OF_CLASSIFIERS * 2 + 1, target)
def test_should_return_one_permutation(self): # given generate_all_permutations = False # when classifier_data = ClassifierData( generate_all_permutations=generate_all_permutations) permutation = ClassifLibrary.generate_permutations(classifier_data) # then self.assertEqual(1, len(permutation)) self.assertEqual((0, 1), permutation[0])
def test_should_return_right_permutations(self): # given number_of_classifiers = 10 classifier_data = ClassifierData( number_of_classifiers=number_of_classifiers) # when permutations = ClassifLibrary.generate_permutations(classifier_data) # then self.assertEqual( len(list(permutations)), int((number_of_classifiers + 2) * (number_of_classifiers + 1)))
def test_should_not_change_data(self): # given # when X1, y1 = ClassifLibraryOld.load_samples_from_file_non_parametrized( self.TEST_FILENAME) X2, y2 = ClassifLibraryOld.load_samples_from_datasets_first_two_rows( ClassifierData(number_of_dataset_if_not_generated=12)) # then self.assertTrue(len(X2) <= len(X1)) for i in range(len(X2)): self.assertTrue(X2[i] in X1)
def test_should_return_2_best_from_3_coefficients(self): # given coefficients = [[1, 2], [3, 4], [5, 6]] scores = [[3], [1], [2]] j = 0 classifier_data = ClassifierData() # when filtered_coefficients = ClassifLibrary.reduce_coefficients_in_subspace( coefficients, scores, j, classifier_data) # then self.assertTrue([1, 2] in filtered_coefficients) self.assertTrue([5, 6] in filtered_coefficients)
def test_should_select_right_features_when_swapped(self): # given X = [[0, 5, 10], [1, 0, 10], [2, 6, 10], [3, -1, 10], [4, 4, 10]] y = [1, 0, 1, 0, 1] expected_X0 = [[1, 0], [3, -1]] expected_X1 = [[0, 5], [2, 6], [4, 4]] classifier_data = ClassifierData(switch_columns_while_loading=True) # when X0, X1 = ClassifLibrary.make_selection(X, y, classifier_data) # then self.assertEqual(expected_X0, X0) self.assertEqual(expected_X1, X1)
def test_should_contain_same_data_given_columns(self): # given data = ClassifierData(are_samples_generated=False, filename='datasets.xlsx') # when X1, y1 = ClassifLibrary.prepare_raw_data(data) X2, y2 = ClassifLibraryOld.load_samples_from_datasets_non_parametrised( ) # then self.assertTrue(len(X2) == len(X1)) self.assertEqual(len(X1[0]), 2) self.assertEqual(len(X2[0]), 2)
def test_should_not_change_data_whole(self): # given data = ClassifierData(are_samples_generated=False, filename='datasets.xlsx', number_of_dataset_if_not_generated=12) # when X1, y1 = ClassifLibraryOld.load_samples_from_file_non_parametrized( self.TEST_FILENAME) X2, y2 = ClassifLibrary.prepare_raw_data(data) # then self.assertTrue(len(X2) <= len(X1)) self.assertEqual(len(X1[0]), 2) self.assertEqual(len(X2[0]), 2)
def test_should_evaluate_weighted_average_coefficients_from_n_best(self): # given coefficients = [[1, 2], [3, 4], [5, 6], [7, 8]] scores = [[0], [0.25], [0.5], [0.75]] # when a, b = \ ClassifLibrary.evaluate_weighted_average_coefficients_from_n_best( coefficients, scores, 0, ClassifierData(number_of_best_classifiers = 2, number_of_classifiers = len(scores))) # then self.assertEqual((coefficients[2][0] * scores[2][0] + coefficients[3][0] * scores[3][0]) / (scores[2][0] + scores[3][0]), a) self.assertEqual((coefficients[2][1] * scores[2][0] + coefficients[3][1] * scores[3][0]) / (scores[2][0] + scores[3][0]), b)
def test_should_evaluate_average_coefficients_from_n_best(self): # given coefficients = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] scores = [[0.25], [0], [0.5], [0.75], [1]] # when a, b = \ ClassifLibraryOld.evaluate_average_coefficients_from_n_best( coefficients, scores, 0, ClassifierData(number_of_best_classifiers = 3, number_of_classifiers = len(scores))) # then self.assertEqual( (coefficients[2][0] + coefficients[3][0] + coefficients[4][0]) / 3, a) self.assertEqual( (coefficients[2][1] + coefficients[3][1] + coefficients[4][1]) / 3, b)
def test_should_return_3_best_from_5_coefficients(self): # given coefficients = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]] scores = [[3, 0], [1, 4], [2, 2], [4, 5], [5, 0]] number_of_classifiers = 5 number_of_best_classifiers = 3 j = 1 classifier_data = ClassifierData( number_of_classifiers=number_of_classifiers, number_of_best_classifiers=number_of_best_classifiers) # when filtered_coefficients = ClassifLibrary.reduce_coefficients_in_subspace( coefficients, scores, j, classifier_data) # then self.assertTrue([3, 4] in filtered_coefficients) self.assertTrue([5, 6] in filtered_coefficients) self.assertTrue([7, 8] in filtered_coefficients)
def test_should_return_right_dataset_permutation(self): # given X = [[0], [1], [2], [3], [4]] y = [[5], [6], [7], [8], [9]] tup = (1, 3) # when X_whole_train, y_whole_train, X_validation, y_validation, X_test, y_test = \ ClassifLibrary.get_permutation(X, y, tup, ClassifierData()) # then self.assertEqual(X[tup[0]], X_validation) self.assertEqual(y[tup[0]], y_validation) self.assertEqual(X[tup[1]], X_test) self.assertEqual(y[tup[1]], y_test) for i in range(len(X)): if i not in tup: self.assertTrue(X[i] in X_whole_train) self.assertTrue(y[i] in y_whole_train)