Esempio n. 1
0
    def test_normalize_one_of_n(self):
        # find the Iris data set
        irisFile = os.path.dirname(os.path.realpath(__file__))
        irisFile = os.path.abspath(irisFile + "../../../datasets/iris.csv")

        norm = Normalize()

        result = norm.load_csv(irisFile)

        self.assertEqual(len(norm.column_map), 5)
        self.assertEqual(len(norm.header), 5)
        self.assertEqual(norm.header[0], "sepal_length")
        self.assertEqual(norm.header[1], "sepal_width")
        self.assertEqual(norm.header[2], "petal_length")
        self.assertEqual(norm.header[3], "petal_width")
        self.assertEqual(norm.header[4], "class")
        self.assertTrue("sepal_length" in norm.column_map)
        self.assertTrue("sepal_width" in norm.column_map)
        self.assertTrue("petal_length" in norm.column_map)
        self.assertTrue("petal_width" in norm.column_map)
        self.assertTrue("class" in norm.column_map)
        self.assertEqual(norm.resolve_column("sepal_length"), 0)
        self.assertEqual(norm.resolve_column("sepal_width"), 1)
        self.assertEqual(norm.resolve_column("petal_length"), 2)
        self.assertEqual(norm.resolve_column("petal_width"), 3)
        self.assertEqual(norm.resolve_column("class"), 4)
        self.assertRaises(AIFHError, norm.resolve_column, 6)
        self.assertRaises(AIFHError, norm.resolve_column, "unknown")

        for i in range(0, 4):
            norm.make_col_numeric(result, i)
            norm.norm_col_range(result, i, -1, 1)

        self.assertAlmostEqual(result[0][0], -0.555, 2)
        self.assertAlmostEqual(result[0][1], 0.249, 2)
        self.assertAlmostEqual(result[0][2], -0.864, 2)
        self.assertAlmostEqual(result[0][3], -0.916, 2)

        classes = norm.build_class_map(result, 4)
        norm.norm_col_one_of_n(result, 4, classes, -1, 1)
        self.assertEqual(len(classes), 3)
Esempio n. 2
0
# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
iris_work = norm.load_csv(irisFile)

# Extract the original iris species so we can display during the final validation.
ideal_species = [row[4] for row in iris_work]

# Setup the first four fields to "range normalize" between -1 and 1.
for i in range(0, 4):
    norm.make_col_numeric(iris_work, i)
    norm.norm_col_range(iris_work, i, 0, 1)

# Discover all of the classes for column #4, the iris species.
classes = norm.build_class_map(iris_work, 4)
inv_classes = {v: k for k, v in classes.items()}

# Normalize iris species using one-of-n.
# We could have used equilateral as well.  For an example of equilateral, see the example_nm_iris example.
norm.norm_col_one_of_n(iris_work, 4, classes, 0, 1)

# Prepare training data.  Separate into input and ideal.
training = np.array(iris_work)
training_input = training[:, 0:4]
training_ideal = training[:, 4:7]
Esempio n. 3
0
# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
iris_work = norm.load_csv(irisFile)

# Extract the original iris species so we can display during the final validation.
ideal_species = [row[4] for row in iris_work]

# Setup the first four fields to "range normalize" between -1 and 1.
for i in range(0, 4):
    norm.make_col_numeric(iris_work, i)
    norm.norm_col_range(iris_work, i, 0, 1)

# Discover all of the classes for column #4, the iris species.
classes = norm.build_class_map(iris_work, 4)
inv_classes = {v: k for k, v in classes.items()}

# Normalize iris species using one-of-n.
# We could have used equilateral as well.  For an example of equilateral, see the example_nm_iris example.
norm.norm_col_one_of_n(iris_work, 4, classes, 0, 1)


# Prepare training data.  Separate into input and ideal.
training = np.array(iris_work)
training_input = training[:, 0:4]
training_ideal = training[:, 4:7]
Esempio n. 4
0
    result += coeff[0]
    return result


# find the Iris data set
abaloneFile = os.path.dirname(os.path.realpath(__file__))
abaloneFile = os.path.abspath(abaloneFile + "../../datasets/abalone.csv")

# Normalize abalone file.

norm = Normalize()
abalone_work = norm.load_csv(abaloneFile)

# Make all columns beyond col #1 numeric.
for i in range(1, 9):
    norm.make_col_numeric(abalone_work, i)

# Discover all of the classes for column #1, the gender.
classes = norm.build_class_map(abalone_work, 0)

# Normalize gender one-of-n encoding.
norm.norm_col_one_of_n(abalone_work, 0, classes, 0, 1)

# Separate into input and ideal.

training = np.array(abalone_work)
training_input = training[:, 0:10]
training_ideal = training[:, 10:11]

coeff = multi_linear_regression(training_input, training_ideal)
Esempio n. 5
0

# find the Wisconsin breast cancer data set
dataFile = os.path.dirname(os.path.realpath(__file__))
dataFile = os.path.abspath(dataFile + "../../datasets/breast-cancer-wisconsin.csv")

# Normalize the Wisconsin file.

norm = Normalize()
data_file_work = norm.load_csv(dataFile)
norm.delete_unknowns(data_file_work)
norm.col_delete(data_file_work, 0)
norm.col_replace(data_file_work, 9, 4, 1, 0)

for i in xrange(0, 9):
    norm.make_col_numeric(data_file_work, i)

df = pd.DataFrame(data_file_work)
df.columns = ["clump_thickness", "size_uniformity", "shape_uniformity", "marginal_adhesion", "epithelial_size",
              "bare_nucleoli", "bland_chromatin", "normal_nucleoli", "mitoses", "class"]

train_cols = df.columns[0:9]

# Perform the logistic regression.
logit = sm.Logit(df['class'], df[train_cols])

# fit the model
result = logit.fit()

# Display the results.
print(result.summary())
Esempio n. 6
0

add_wrapper = FunctionWrapper(add, 2, "+")
sub_wrapper = FunctionWrapper(sub, 2, "-")
mul_wrapper = FunctionWrapper(mul, 2, "*")
div_wrapper = FunctionWrapper(div, 2, "/")

# find the Iris data set
polyFile = os.path.dirname(os.path.realpath(__file__))
polyFile = os.path.abspath(polyFile + "../../datasets/simple-poly.csv")

# Read the Iris data set.
print('Reading CSV file: ' + polyFile)
norm = Normalize()
poly_work = norm.load_csv(polyFile)
norm.make_col_numeric(poly_work, 0)
norm.make_col_numeric(poly_work, 1)

# Prepare training data.  Separate into input and ideal.
training = np.array(poly_work)
training_input = training[:, 0:1]
training_ideal = training[:, 1:2]


# Calculate the error with MSE.
def score_function(genome):
    # Loop over the training set and calculate the output for each.
    actual_output = []
    for input_data in training_input:
        genome.set_variable_value(["x"], input_data)
        output_data = genome.eval()
Esempio n. 7
0
aifh_dir = os.path.dirname(os.path.abspath(__file__))
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize


# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
result = norm.load_csv(irisFile)

# Setup the first four fields to "range normalize" between -1 and 1.
for i in range(0, 4):
    norm.make_col_numeric(result, i)
    norm.norm_col_range(result, i, -1, 1)

# Discover all of the classes for column #4, the iris species.
classes = norm.build_class_map(result, 4)

# Normalize iris species with equilateral encoding
norm.norm_col_equilateral(result, 4, classes, -1, 1)

# Display the resulting data
norm.display_data(result)

Esempio n. 8
0
                           "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize

k = 3

# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
iris_data = norm.load_csv(irisFile)

# Prepare the iris data set.
classes = norm.col_extract(iris_data, 4)
norm.col_delete(iris_data, 4)
for i in range(0, 4):
    norm.make_col_numeric(iris_data, i)

# Cluster the Iris data set.
res, idx = kmeans2(np.array(iris_data), k)

for cluster_num in range(0, k):
    print("Cluster #" + str(cluster_num + 1))
    for i in range(0, len(idx)):
        if idx[i] == cluster_num:
            print(str(iris_data[i]) + "," + classes[i])
Esempio n. 9
0
    return args[0] / args[1]

add_wrapper = FunctionWrapper(add, 2, "+")
sub_wrapper = FunctionWrapper(sub, 2, "-")
mul_wrapper = FunctionWrapper(mul, 2, "*")
div_wrapper = FunctionWrapper(div, 2, "/")

# find the Iris data set
polyFile = os.path.dirname(os.path.realpath(__file__))
polyFile = os.path.abspath(polyFile + "../../datasets/simple-poly.csv")

# Read the Iris data set.
print('Reading CSV file: ' + polyFile)
norm = Normalize()
poly_work = norm.load_csv(polyFile)
norm.make_col_numeric(poly_work,0)
norm.make_col_numeric(poly_work,1)

# Prepare training data.  Separate into input and ideal.
training = np.array(poly_work)
training_input = training[:, 0:1]
training_ideal = training[:, 1:2]

# Calculate the error with MSE.
def score_function(genome):
    # Loop over the training set and calculate the output for each.
    actual_output = []
    for input_data in training_input:
        genome.set_variable_value(["x"], input_data)
        output_data = genome.eval()
        actual_output.append([output_data])
Esempio n. 10
0
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep + "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize

k = 3

# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
iris_data = norm.load_csv(irisFile)

# Prepare the iris data set.
classes = norm.col_extract(iris_data, 4)
norm.col_delete(iris_data, 4)
for i in range(0, 4):
    norm.make_col_numeric(iris_data, i)

# Cluster the Iris data set.
res, idx = kmeans2(np.array(iris_data), k)

for cluster_num in range(0, k):
    print( "Cluster #" + str(cluster_num + 1))
    for i in range(0, len(idx)):
        if idx[i] == cluster_num:
            print( str(iris_data[i]) + "," + classes[i])
Esempio n. 11
0
# find the Wisconsin breast cancer data set
dataFile = os.path.dirname(os.path.realpath(__file__))
dataFile = os.path.abspath(dataFile +
                           "../../datasets/breast-cancer-wisconsin.csv")

# Normalize the Wisconsin file.

norm = Normalize()
data_file_work = norm.load_csv(dataFile)
norm.delete_unknowns(data_file_work)
norm.col_delete(data_file_work, 0)
norm.col_replace(data_file_work, 9, 4, 1, 0)

for i in range(0, 9):
    norm.make_col_numeric(data_file_work, i)

df = pd.DataFrame(data_file_work)
df.columns = [
    "clump_thickness", "size_uniformity", "shape_uniformity",
    "marginal_adhesion", "epithelial_size", "bare_nucleoli", "bland_chromatin",
    "normal_nucleoli", "mitoses", "class"
]

train_cols = df.columns[0:9]

# Perform the logistic regression.
logit = sm.Logit(df['class'], df[train_cols])

# fit the model
result = logit.fit()
Esempio n. 12
0
    result += coeff[0]
    return result


# find the Iris data set
abaloneFile = os.path.dirname(os.path.realpath(__file__))
abaloneFile = os.path.abspath(abaloneFile + "../../datasets/abalone.csv")

# Normalize abalone file.

norm = Normalize()
abalone_work = norm.load_csv(abaloneFile)

# Make all columns beyond col #1 numeric.
for i in range(1, 9):
    norm.make_col_numeric(abalone_work, i)

# Discover all of the classes for column #1, the gender.
classes = norm.build_class_map(abalone_work, 0)

# Normalize gender one-of-n encoding.
norm.norm_col_one_of_n(abalone_work, 0, classes, 0, 1)

# Separate into input and ideal.

training = np.array(abalone_work)
training_input = training[:, 0:10]
training_ideal = training[:, 10:11]

coeff = multi_linear_regression(training_input, training_ideal)
Esempio n. 13
0
# Find the AIFH core files
aifh_dir = os.path.dirname(os.path.abspath(__file__))
aifh_dir = os.path.abspath(aifh_dir + os.sep + ".." + os.sep + "lib" + os.sep +
                           "aifh")
sys.path.append(aifh_dir)

from normalize import Normalize

# find the Iris data set
irisFile = os.path.dirname(os.path.realpath(__file__))
irisFile = os.path.abspath(irisFile + "../../datasets/iris.csv")

# Read the Iris data set.
print('Reading CSV file: ' + irisFile)
norm = Normalize()
result = norm.load_csv(irisFile)

# Setup the first four fields to "range normalize" between -1 and 1.
for i in range(0, 4):
    norm.make_col_numeric(result, i)
    norm.norm_col_range(result, i, -1, 1)

# Discover all of the classes for column #4, the iris species.
classes = norm.build_class_map(result, 4)

# Normalize iris species with equilateral encoding
norm.norm_col_equilateral(result, 4, classes, -1, 1)

# Display the resulting data
norm.display_data(result)