def testget_paramsFail(self): # Get params when not all features/class have been enumerated dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") with self.assertRaises(Exception) as context: se.get_params() self.assertTrue("Features and Phenotypes must be fully numeric" in str( context.exception))
def testContValues5000Iterations(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/ContinuousAndNonBinaryDiscreteAttributes.csv") converter = StringEnumerator(dataPath,"Class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() clf = XCS(learning_iterations=5000) clf.fit(dataFeatures,dataPhenotypes) answer = 0.64 #print("Continuous Attributes 5000 Iter: "+str(clf.get_final_training_accuracy())) self.assertTrue(self.approxEqualOrBetter(0.2,clf.get_final_training_accuracy(),answer,True))
def test20BitMP5000Iterations(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer20Modified.csv") converter = StringEnumerator(dataPath,"Class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() clf = XCS(learning_iterations=5000,N=2000,nu=10) clf.fit(dataFeatures,dataPhenotypes) answer = 0.6634 #print("20 Bit 5000 Iter: "+str(clf.get_final_training_accuracy())) self.assertTrue(self.approxEqualOrBetter(0.2,clf.get_final_training_accuracy(),answer,True))
def testPredictInvVar(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer6Modified.csv") converter = StringEnumerator(dataPath, "Class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params( ) clf = XCS(learning_iterations=1000, N=500, nu=10, use_inverse_varinance=True, p_explore=0.5) clf.fit(dataFeatures, dataPhenotypes) print("kkkkkkkkkkkkkkkkkkkkkkkkkkk") print(clf.predict(clf.env.formatData.savedRawTrainingData[0]))
def testInverseVariance(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer11.csv") converter = StringEnumerator(dataPath, "class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params( ) clf = XCS(learning_iterations=5000, N=1000, mixing_method="inv-var-only-mixing") clf.fit(dataFeatures, dataPhenotypes) answer = 0.894 score = clf.get_final_training_accuracy() print("#####################################\n6 Bit 1000 Iter: " + str(score))
def testContValuesAndMissingTesting5000Iterations(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/ContinuousAndNonBinaryDiscreteAttributesMissing.csv") converter = StringEnumerator(dataPath, "Class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataPhenotypes, 1) np.random.shuffle(formatted) dataFeatures = np.delete(formatted, -1, axis=1) dataPhenotypes = formatted[:, -1] clf = XCS(learning_iterations=5000) score = np.mean(cross_val_score(clf, dataFeatures, dataPhenotypes, cv=3)) answer = 0.5 #print("Cont & Missing Testing 5000 Iter: " + str(score)) self.assertTrue(self.approxEqualOrBetter(0.2, score, answer, True))
def test6BitMPTesting1000Iterations(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer6Modified.csv") converter = StringEnumerator(dataPath,"Class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataPhenotypes, 1) np.random.shuffle(formatted) dataFeatures = np.delete(formatted, -1, axis=1) dataPhenotypes = formatted[:, -1] clf = XCS(learning_iterations=1000,N=500,nu=10) score = np.mean(cross_val_score(clf, dataFeatures, dataPhenotypes, cv=3)) answer = 0.9 #print("6 Bit Testing 1000 Iter: "+str(score)) self.assertTrue(self.approxEqualOrBetter(0.2,score,answer,True))
def testNew(self): #Use StringEnumerator to gather data converter = StringEnumerator("test/DataSets/Real/Multiplexer11.csv", "class") headers, actionLabel, dataFeatures, dataActions = converter.get_params( ) #Shuffle data formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataActions, 1) np.random.shuffle(formatted) dataFeatures = np.delete(formatted, -1, axis=1) dataActions = formatted[:, -1] #Initialize and train model clf_inv_var = XCS(learning_iterations=1000, N=200, use_inverse_varinance=True) clf_inv_var.fit(dataFeatures, dataActions) breakpoint()
def testget_params2(self): # Get Params Test dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv") se = StringEnumerator(dataPath, "phenotype") se.change_header_name("N1", "gender") se.change_header_name("N2", "floats") se.change_header_name("N3", "age") se.change_class_name("country") se.add_attribute_converter("gender", np.array(["female", "male", "NA", "other"])) se.add_attribute_converter("age", np.array(["old", "young"])) se.add_class_converter_random() se.convert_all_attributes() dataHeaders, classLabel, dataFeatures, dataPhenotypes = se.get_params() cHeaders = np.array(["gender", "floats", "age"]) cFeatures = np.array([[1, 1.2, 1], [0, 0.3, np.nan], [0, -0.4, 0], [np.nan, 0, 1]]) cPhenotypes = np.array([0, 1, 0, 2]) self.assertEqual("country", classLabel) self.assertTrue(np.array_equal(cHeaders, dataHeaders)) self.assertTrue(np.allclose(cFeatures, dataFeatures, equal_nan=True)) self.assertTrue( np.allclose(cPhenotypes, dataPhenotypes, equal_nan=True))