def testDeleteNonexistentAttribute(self): # Deletes nonexistent attribute dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") with self.assertRaises(Exception) as context: se.delete_attribute("N") self.assertTrue("Header Doesn't Exist" in str(context.exception))
def testDeleteInstancesWithMissing2(self): # Deletes instances and checks arrays for correctness dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") se.change_header_name("N1", "gender") se.delete_all_instances_without_header_data("gender") se.delete_all_instances_without_header_data("N2") se.delete_all_instances_without_header_data("N3") se.add_attribute_converter_random("gender") se.add_attribute_converter_random("N3") se.add_class_converter_random() se.convert_all_attributes() cHeaders = np.array(["gender", "N2", "N3"]) cMap = { "phenotype": { "china": "0" }, "gender": { "male": "0", "female": "1" }, "N3": { "young": "0", "old": "1" } } cArray = np.array([["0", "1.2", "0"], ["1", "-0.4", "1"]]) cPArray = np.array(["0", "0"]) self.assertTrue(np.array_equal(cHeaders, se.dataHeaders)) self.assertTrue(np.array_equal("phenotype", se.classLabel)) self.assertTrue(np.array_equal(cArray, se.dataFeatures)) self.assertTrue(np.array_equal(cPArray, se.dataPhenotypes)) self.assertTrue(se.map == cMap)
def testChangeClassAndHeaderNames2(self): # Changes header and class names. Checks map, and classLabel/dataHeaders correctness dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") se.add_class_converter_random() se.change_header_name("N1", "gender") se.add_attribute_converter_random("gender") se.change_header_name("gender", "Gender") se.add_attribute_converter_random("Gender") se.add_attribute_converter_random("Gender") se.add_attribute_converter_random("gender") se.add_attribute_converter_random("N3") se.change_header_name("N3", "Age") cHeaders = np.array(["Gender", "N2", "Age"]) cMap = { "phenotype": { "china": "0", "japan": "1", "russia": "2" }, "Gender": { "male": "0", "female": "1" }, "Age": { "young": "0", "old": "1" } } self.assertTrue(np.array_equal(cHeaders, se.dataHeaders)) self.assertTrue(np.array_equal("phenotype", se.classLabel)) self.assertTrue(se.map == cMap)
def testInitHeaders(self): # Tests if init gets the headers correct dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/MissingFeatureData.csv") se = StringEnumerator(dataPath, "phenotype") cHeaders = np.array(["N1", "N2", "N3", "N4"]) self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
def testget_paramsFail(self): # Get params when not all features/class have been enumerated dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") with self.assertRaises(Exception) as context: se.get_params() self.assertTrue("Features and Phenotypes must be fully numeric" in str( context.exception))
def testNumericCheck(self): # Checks non missing numeric dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") self.assertFalse(se.check_is_full_numeric()) se.add_attribute_converter_random("N1") se.convert_all_attributes() self.assertFalse(se.check_is_full_numeric()) se.add_attribute_converter_random("N3") se.add_class_converter_random() se.convert_all_attributes() self.assertTrue(se.check_is_full_numeric()) dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/MissingFeatureData.csv") se2 = StringEnumerator(dataPath, "phenotype") self.assertTrue(se2.check_is_full_numeric())
def testchange_header_nameInvalid2(self): # Changes non existing header name should raise exception dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") with self.assertRaises(Exception) as context: se.change_header_name("N", "N5") self.assertTrue( "Current Header Doesn't Exist" in str(context.exception))
def test20BitMP5000Iterations(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer20Modified.csv") converter = StringEnumerator(dataPath,"Class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() clf = XCS(learning_iterations=5000,N=2000,nu=10) clf.fit(dataFeatures,dataPhenotypes) answer = 0.6634 #print("20 Bit 5000 Iter: "+str(clf.get_final_training_accuracy())) self.assertTrue(self.approxEqualOrBetter(0.2,clf.get_final_training_accuracy(),answer,True))
def testContValues5000Iterations(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/ContinuousAndNonBinaryDiscreteAttributes.csv") converter = StringEnumerator(dataPath,"Class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() clf = XCS(learning_iterations=5000) clf.fit(dataFeatures,dataPhenotypes) answer = 0.64 #print("Continuous Attributes 5000 Iter: "+str(clf.get_final_training_accuracy())) self.assertTrue(self.approxEqualOrBetter(0.2,clf.get_final_training_accuracy(),answer,True))
def testchange_header_nameInvalid(self): # Changes header name to an existing header or class name should raise exception dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") with self.assertRaises(Exception) as context: se.change_header_name("N1", "N2") self.assertTrue( "New Class Name Cannot Be An Already Existing Data Header or Phenotype Name" in str(context.exception))
def testInitFeaturesAndClassRemoval(self): # Tests if init gets the features and class arrays correct given missing phenotype data dataPath = os.path.join( THIS_DIR, "test/DataSets/Tests/MissingFeatureAndPhenotypeData.csv") se = StringEnumerator(dataPath, "phenotype") cFeatures = np.array([["1.0", "NA", "1.0", "4.0"], ["NA", "1.0", "NA", "1.0"], ["6.0", "NA", "1.0", "1.0"]]) cClasses = np.array(["1.0", "0.0", "1.0"]) self.assertTrue(np.array_equal(cFeatures, se.dataFeatures)) self.assertTrue(np.array_equal(cClasses, se.dataPhenotypes))
def testInitMissingData(self): # Tests if init filters missing data into NAs dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/MissingFeatureData.csv") se = StringEnumerator(dataPath, "phenotype") cFeatures = np.array([["1.0", "NA", "1.0", "4.0"], ["2.0", "0.0", "1.0", "NA"], ["4.0", "NA", "1.0", "2.0"], ["NA", "1.0", "NA", "1.0"], ["6.0", "NA", "1.0", "1.0"]]) self.assertTrue(np.array_equal(cFeatures, se.dataFeatures))
def testChangeClassAndHeaderNames(self): # Changes header and class names. Checks map, and classLabel/dataHeaders correctness dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") se.change_class_name("country") se.change_header_name("N1", "gender") se.change_header_name("N2", "N1") se.change_header_name("N1", "floats") se.change_header_name("N3", "phenotype") se.change_header_name("phenotype", "age") cHeaders = np.array(["gender", "floats", "age"]) self.assertTrue(np.array_equal(cHeaders, se.dataHeaders)) self.assertTrue(np.array_equal("country", se.classLabel))
def testdelete_attribute(self): # Deletes attributes and checks map, headers, and arrays for correctness dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") se.change_header_name("N1", "gender") se.add_attribute_converter_random("gender") se.add_attribute_converter_random("N3") se.delete_attribute("gender") cHeaders = np.array(["N2", "N3"]) cMap = {"N3": {"young": "0", "old": "1"}} self.assertTrue(np.array_equal(cHeaders, se.dataHeaders)) self.assertTrue(np.array_equal("phenotype", se.classLabel)) self.assertTrue(se.map == cMap)
def testPredictInvVar(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer6Modified.csv") converter = StringEnumerator(dataPath, "Class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params( ) clf = XCS(learning_iterations=1000, N=500, nu=10, use_inverse_varinance=True, p_explore=0.5) clf.fit(dataFeatures, dataPhenotypes) print("kkkkkkkkkkkkkkkkkkkkkkkkkkk") print(clf.predict(clf.env.formatData.savedRawTrainingData[0]))
def testInverseVariance(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer11.csv") converter = StringEnumerator(dataPath, "class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params( ) clf = XCS(learning_iterations=5000, N=1000, mixing_method="inv-var-only-mixing") clf.fit(dataFeatures, dataPhenotypes) answer = 0.894 score = clf.get_final_training_accuracy() print("#####################################\n6 Bit 1000 Iter: " + str(score))
def testContValuesAndMissingTesting5000Iterations(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/ContinuousAndNonBinaryDiscreteAttributesMissing.csv") converter = StringEnumerator(dataPath, "Class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataPhenotypes, 1) np.random.shuffle(formatted) dataFeatures = np.delete(formatted, -1, axis=1) dataPhenotypes = formatted[:, -1] clf = XCS(learning_iterations=5000) score = np.mean(cross_val_score(clf, dataFeatures, dataPhenotypes, cv=3)) answer = 0.5 #print("Cont & Missing Testing 5000 Iter: " + str(score)) self.assertTrue(self.approxEqualOrBetter(0.2, score, answer, True))
def test6BitMPTesting1000Iterations(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer6Modified.csv") converter = StringEnumerator(dataPath,"Class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataPhenotypes, 1) np.random.shuffle(formatted) dataFeatures = np.delete(formatted, -1, axis=1) dataPhenotypes = formatted[:, -1] clf = XCS(learning_iterations=1000,N=500,nu=10) score = np.mean(cross_val_score(clf, dataFeatures, dataPhenotypes, cv=3)) answer = 0.9 #print("6 Bit Testing 1000 Iter: "+str(score)) self.assertTrue(self.approxEqualOrBetter(0.2,score,answer,True))
def testNew(self): #Use StringEnumerator to gather data converter = StringEnumerator("test/DataSets/Real/Multiplexer11.csv", "class") headers, actionLabel, dataFeatures, dataActions = converter.get_params( ) #Shuffle data formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataActions, 1) np.random.shuffle(formatted) dataFeatures = np.delete(formatted, -1, axis=1) dataActions = formatted[:, -1] #Initialize and train model clf_inv_var = XCS(learning_iterations=1000, N=200, use_inverse_varinance=True) clf_inv_var.fit(dataFeatures, dataActions) breakpoint()
def testget_params2(self): # Get Params Test dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") se.change_header_name("N1", "gender") se.change_header_name("N2", "floats") se.change_header_name("N3", "age") se.change_class_name("country") se.add_attribute_converter("gender", np.array(["female", "male", "NA", "other"])) se.add_attribute_converter("age", np.array(["old", "young"])) se.add_class_converter_random() se.convert_all_attributes() dataHeaders, classLabel, dataFeatures, dataPhenotypes = se.get_params() cHeaders = np.array(["gender", "floats", "age"]) cFeatures = np.array([[1, 1.2, 1], [0, 0.3, np.nan], [0, -0.4, 0], [np.nan, 0, 1]]) cPhenotypes = np.array([0, 1, 0, 2]) self.assertEqual("country", classLabel) self.assertTrue(np.array_equal(cHeaders, dataHeaders)) self.assertTrue(np.allclose(cFeatures, dataFeatures, equal_nan=True)) self.assertTrue( np.allclose(cPhenotypes, dataPhenotypes, equal_nan=True))