def test_normalize_equilateral(self): # find the Iris data set irisFile = os.path.dirname(os.path.realpath(__file__)) irisFile = os.path.abspath(irisFile + "../../../datasets/iris.csv") norm = Normalize() result = norm.load_csv(irisFile) classes = norm.build_class_map(result, 4) norm.norm_col_equilateral(result, 4, classes, 0, 1) self.assertEqual(len(result[0]), 6) self.assertAlmostEqual(result[0][4], 0.06698, 3)
def test_normalize_one_of_n(self): # find the Iris data set irisFile = os.path.dirname(os.path.realpath(__file__)) irisFile = os.path.abspath(irisFile + "../../../datasets/iris.csv") norm = Normalize() result = norm.load_csv(irisFile) self.assertEqual(len(norm.column_map), 5) self.assertEqual(len(norm.header), 5) self.assertEqual(norm.header[0], "sepal_length") self.assertEqual(norm.header[1], "sepal_width") self.assertEqual(norm.header[2], "petal_length") self.assertEqual(norm.header[3], "petal_width") self.assertEqual(norm.header[4], "class") self.assertTrue("sepal_length" in norm.column_map) self.assertTrue("sepal_width" in norm.column_map) self.assertTrue("petal_length" in norm.column_map) self.assertTrue("petal_width" in norm.column_map) self.assertTrue("class" in norm.column_map) self.assertEqual(norm.resolve_column("sepal_length"), 0) self.assertEqual(norm.resolve_column("sepal_width"), 1) self.assertEqual(norm.resolve_column("petal_length"), 2) self.assertEqual(norm.resolve_column("petal_width"), 3) self.assertEqual(norm.resolve_column("class"), 4) self.assertRaises(AIFHError, norm.resolve_column, 6) self.assertRaises(AIFHError, norm.resolve_column, "unknown") for i in range(0, 4): norm.make_col_numeric(result, i) norm.norm_col_range(result, i, -1, 1) self.assertAlmostEqual(result[0][0], -0.555, 2) self.assertAlmostEqual(result[0][1], 0.249, 2) self.assertAlmostEqual(result[0][2], -0.864, 2) self.assertAlmostEqual(result[0][3], -0.916, 2) classes = norm.build_class_map(result, 4) norm.norm_col_one_of_n(result, 4, classes, -1, 1) self.assertEqual(len(classes), 3)
"aifh") sys.path.append(aifh_dir) from normalize import Normalize from rbf_network import RbfNetwork from error import ErrorCalculation from train import TrainAnneal # find the Iris data set irisFile = os.path.dirname(os.path.realpath(__file__)) irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv") # Read the Iris data set. print('Reading CSV file: ' + irisFile) norm = Normalize() iris_work = norm.load_csv(irisFile) # Extract the original iris species so we can display during the final validation. ideal_species = [row[4] for row in iris_work] # Setup the first four fields to "range normalize" between -1 and 1. for i in range(0, 4): norm.make_col_numeric(iris_work, i) norm.norm_col_range(iris_work, i, 0, 1) # Discover all of the classes for column #4, the iris species. classes = norm.build_class_map(iris_work, 4) inv_classes = {v: k for k, v in classes.items()} # Normalize iris species using one-of-n. # We could have used equilateral as well. For an example of equilateral, see the example_nm_iris example.
sys.path.append(aifh_dir) from normalize import Normalize from rbf_network import RbfNetwork from error import ErrorCalculation from train import TrainAnneal import numpy as np # find the Iris data set irisFile = os.path.dirname(os.path.realpath(__file__)) irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv") # Read the Iris data set. print('Reading CSV file: ' + irisFile) norm = Normalize() iris_work = norm.load_csv(irisFile) # Extract the original iris species so we can display during the final validation. ideal_species = [row[4] for row in iris_work] # Setup the first four fields to "range normalize" between -1 and 1. for i in range(0, 4): norm.make_col_numeric(iris_work, i) norm.norm_col_range(iris_work, i, 0, 1) # Discover all of the classes for column #4, the iris species. classes = norm.build_class_map(iris_work, 4) inv_classes = {v: k for k, v in classes.items()} # Normalize iris species using one-of-n. # We could have used equilateral as well. For an example of equilateral, see the example_nm_iris example.
result = 0 for i in range(1, len(coeff)): result += x[i - 1] * coeff[i] result += coeff[0] return result # find the Iris data set abaloneFile = os.path.dirname(os.path.realpath(__file__)) abaloneFile = os.path.abspath(abaloneFile + "../../datasets/abalone.csv") # Normalize abalone file. norm = Normalize() abalone_work = norm.load_csv(abaloneFile) # Make all columns beyond col #1 numeric. for i in range(1, 9): norm.make_col_numeric(abalone_work, i) # Discover all of the classes for column #1, the gender. classes = norm.build_class_map(abalone_work, 0) # Normalize gender one-of-n encoding. norm.norm_col_one_of_n(abalone_work, 0, classes, 0, 1) # Separate into input and ideal. training = np.array(abalone_work) training_input = training[:, 0:10]
from normalize import Normalize from rbf_network import RbfNetwork from error import ErrorCalculation from train import TrainAnneal import numpy as np # find the Iris data set # irisFile = os.path.dirname(os.path.realpath(__file__)) # irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv") iris_filename = "iris.csv" # Read the Iris data set. print('Reading CSV file: ' + iris_filename) import ipdb; ipdb.set_trace() norm = Normalize() iris_work = norm.load_csv(iris_filename) # Extract the original iris species so we can display during the final validation. ideal_species = [row[4] for row in iris_work] # Setup the first four fields to "range normalize" between -1 and 1. for i in range(0, 4): norm.make_col_numeric(iris_work, i) norm.norm_col_range(iris_work, i, 0, 1) # Discover all of the classes for column #4, the iris species. classes = norm.build_class_map(iris_work, 4) inv_classes = {v: k for k, v in classes.items()} # Normalize iris species using one-of-n. # We could have used equilateral as well. For an example of equilateral, see the example_nm_iris example.
# Find the AIFH core files aifh_dir = os.path.dirname(os.path.abspath(__file__)) aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh") sys.path.append(aifh_dir) from normalize import Normalize # find the Wisconsin breast cancer data set dataFile = os.path.dirname(os.path.realpath(__file__)) dataFile = os.path.abspath(dataFile + "../../datasets/breast-cancer-wisconsin.csv") # Normalize the Wisconsin file. norm = Normalize() data_file_work = norm.load_csv(dataFile) norm.delete_unknowns(data_file_work) norm.col_delete(data_file_work, 0) norm.col_replace(data_file_work, 9, 4, 1, 0) for i in xrange(0, 9): norm.make_col_numeric(data_file_work, i) df = pd.DataFrame(data_file_work) df.columns = ["clump_thickness", "size_uniformity", "shape_uniformity", "marginal_adhesion", "epithelial_size", "bare_nucleoli", "bland_chromatin", "normal_nucleoli", "mitoses", "class"] train_cols = df.columns[0:9] # Perform the logistic regression. logit = sm.Logit(df['class'], df[train_cols])
return args[0] / args[1] add_wrapper = FunctionWrapper(add, 2, "+") sub_wrapper = FunctionWrapper(sub, 2, "-") mul_wrapper = FunctionWrapper(mul, 2, "*") div_wrapper = FunctionWrapper(div, 2, "/") # find the Iris data set polyFile = os.path.dirname(os.path.realpath(__file__)) polyFile = os.path.abspath(polyFile + "../../datasets/simple-poly.csv") # Read the Iris data set. print('Reading CSV file: ' + polyFile) norm = Normalize() poly_work = norm.load_csv(polyFile) norm.make_col_numeric(poly_work, 0) norm.make_col_numeric(poly_work, 1) # Prepare training data. Separate into input and ideal. training = np.array(poly_work) training_input = training[:, 0:1] training_ideal = training[:, 1:2] # Calculate the error with MSE. def score_function(genome): # Loop over the training set and calculate the output for each. actual_output = [] for input_data in training_input: genome.set_variable_value(["x"], input_data)
# Find the AIFH core files aifh_dir = os.path.dirname(os.path.abspath(__file__)) aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh") sys.path.append(aifh_dir) from normalize import Normalize # find the Iris data set irisFile = os.path.dirname(os.path.realpath(__file__)) irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv") # Read the Iris data set. print('Reading CSV file: ' + irisFile) norm = Normalize() result = norm.load_csv(irisFile) # Setup the first four fields to "range normalize" between -1 and 1. for i in range(0, 4): norm.make_col_numeric(result, i) norm.norm_col_range(result, i, -1, 1) # Discover all of the classes for column #4, the iris species. classes = norm.build_class_map(result, 4) # Normalize iris species with equilateral encoding norm.norm_col_equilateral(result, 4, classes, -1, 1) # Display the resulting data norm.display_data(result)
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh") sys.path.append(aifh_dir) from normalize import Normalize k = 3 # find the Iris data set irisFile = os.path.dirname(os.path.realpath(__file__)) irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv") # Read the Iris data set. print('Reading CSV file: ' + irisFile) norm = Normalize() iris_data = norm.load_csv(irisFile) # Prepare the iris data set. classes = norm.col_extract(iris_data, 4) norm.col_delete(iris_data, 4) for i in range(0, 4): norm.make_col_numeric(iris_data, i) # Cluster the Iris data set. res, idx = kmeans2(np.array(iris_data), k) for cluster_num in range(0, k): print("Cluster #" + str(cluster_num + 1)) for i in range(0, len(idx)): if idx[i] == cluster_num: print(str(iris_data[i]) + "," + classes[i])
return 1 return args[0] / args[1] add_wrapper = FunctionWrapper(add, 2, "+") sub_wrapper = FunctionWrapper(sub, 2, "-") mul_wrapper = FunctionWrapper(mul, 2, "*") div_wrapper = FunctionWrapper(div, 2, "/") # find the Iris data set polyFile = os.path.dirname(os.path.realpath(__file__)) polyFile = os.path.abspath(polyFile + "../../datasets/simple-poly.csv") # Read the Iris data set. print('Reading CSV file: ' + polyFile) norm = Normalize() poly_work = norm.load_csv(polyFile) norm.make_col_numeric(poly_work,0) norm.make_col_numeric(poly_work,1) # Prepare training data. Separate into input and ideal. training = np.array(poly_work) training_input = training[:, 0:1] training_ideal = training[:, 1:2] # Calculate the error with MSE. def score_function(genome): # Loop over the training set and calculate the output for each. actual_output = [] for input_data in training_input: genome.set_variable_value(["x"], input_data) output_data = genome.eval()
aifh_dir = os.path.dirname(os.path.abspath(__file__)) aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh") sys.path.append(aifh_dir) from normalize import Normalize k = 3 # find the Iris data set irisFile = os.path.dirname(os.path.realpath(__file__)) irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv") # Read the Iris data set. print('Reading CSV file: ' + irisFile) norm = Normalize() iris_data = norm.load_csv(irisFile) # Prepare the iris data set. classes = norm.col_extract(iris_data, 4) norm.col_delete(iris_data, 4) for i in range(0, 4): norm.make_col_numeric(iris_data, i) # Cluster the Iris data set. res, idx = kmeans2(np.array(iris_data), k) for cluster_num in range(0, k): print( "Cluster #" + str(cluster_num + 1)) for i in range(0, len(idx)): if idx[i] == cluster_num: print( str(iris_data[i]) + "," + classes[i])
aifh_dir = os.path.dirname(os.path.abspath(__file__)) aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh") sys.path.append(aifh_dir) from normalize import Normalize # find the Wisconsin breast cancer data set dataFile = os.path.dirname(os.path.realpath(__file__)) dataFile = os.path.abspath(dataFile + "../../datasets/breast-cancer-wisconsin.csv") # Normalize the Wisconsin file. norm = Normalize() data_file_work = norm.load_csv(dataFile) norm.delete_unknowns(data_file_work) norm.col_delete(data_file_work, 0) norm.col_replace(data_file_work, 9, 4, 1, 0) for i in range(0, 9): norm.make_col_numeric(data_file_work, i) df = pd.DataFrame(data_file_work) df.columns = [ "clump_thickness", "size_uniformity", "shape_uniformity", "marginal_adhesion", "epithelial_size", "bare_nucleoli", "bland_chromatin", "normal_nucleoli", "mitoses", "class" ] train_cols = df.columns[0:9]