def test_normalize_one_of_n(self): # find the Iris data set irisFile = os.path.dirname(os.path.realpath(__file__)) irisFile = os.path.abspath(irisFile + "../../../datasets/iris.csv") norm = Normalize() result = norm.load_csv(irisFile) self.assertEqual(len(norm.column_map), 5) self.assertEqual(len(norm.header), 5) self.assertEqual(norm.header[0], "sepal_length") self.assertEqual(norm.header[1], "sepal_width") self.assertEqual(norm.header[2], "petal_length") self.assertEqual(norm.header[3], "petal_width") self.assertEqual(norm.header[4], "class") self.assertTrue("sepal_length" in norm.column_map) self.assertTrue("sepal_width" in norm.column_map) self.assertTrue("petal_length" in norm.column_map) self.assertTrue("petal_width" in norm.column_map) self.assertTrue("class" in norm.column_map) self.assertEqual(norm.resolve_column("sepal_length"), 0) self.assertEqual(norm.resolve_column("sepal_width"), 1) self.assertEqual(norm.resolve_column("petal_length"), 2) self.assertEqual(norm.resolve_column("petal_width"), 3) self.assertEqual(norm.resolve_column("class"), 4) self.assertRaises(AIFHError, norm.resolve_column, 6) self.assertRaises(AIFHError, norm.resolve_column, "unknown") for i in range(0, 4): norm.make_col_numeric(result, i) norm.norm_col_range(result, i, -1, 1) self.assertAlmostEqual(result[0][0], -0.555, 2) self.assertAlmostEqual(result[0][1], 0.249, 2) self.assertAlmostEqual(result[0][2], -0.864, 2) self.assertAlmostEqual(result[0][3], -0.916, 2) classes = norm.build_class_map(result, 4) norm.norm_col_one_of_n(result, 4, classes, -1, 1) self.assertEqual(len(classes), 3)
# Extract the original iris species so we can display during the final validation. ideal_species = [row[4] for row in iris_work] # Setup the first four fields to "range normalize" between -1 and 1. for i in range(0, 4): norm.make_col_numeric(iris_work, i) norm.norm_col_range(iris_work, i, 0, 1) # Discover all of the classes for column #4, the iris species. classes = norm.build_class_map(iris_work, 4) inv_classes = {v: k for k, v in classes.items()} # Normalize iris species using one-of-n. # We could have used equilateral as well. For an example of equilateral, see the example_nm_iris example. norm.norm_col_one_of_n(iris_work, 4, classes, 0, 1) # Prepare training data. Separate into input and ideal. training = np.array(iris_work) training_input = training[:, 0:4] training_ideal = training[:, 4:7] # Create an RBF network. There are four inputs and two outputs. # There are also five RBF functions used internally. # You can experiment with different numbers of internal RBF functions. # However, the input and output must match the data set. network = RbfNetwork(4, 4, 3) network.reset() def score_funct(x):
abaloneFile = os.path.abspath(abaloneFile + "../../datasets/abalone.csv") # Normalize abalone file. norm = Normalize() abalone_work = norm.load_csv(abaloneFile) # Make all columns beyond col #1 numeric. for i in range(1, 9): norm.make_col_numeric(abalone_work, i) # Discover all of the classes for column #1, the gender. classes = norm.build_class_map(abalone_work, 0) # Normalize gender one-of-n encoding. norm.norm_col_one_of_n(abalone_work, 0, classes, 0, 1) # Separate into input and ideal. training = np.array(abalone_work) training_input = training[:, 0:10] training_ideal = training[:, 10:11] coeff = multi_linear_regression(training_input, training_ideal) print("Solution coefficients: " + str(coeff)) # Evaluate. for i in range(0, len(training_input)): row = training_input[i] y = calc_linear_regression(coeff, row)