def testDeleteNonexistentAttribute(self):
     # Deletes nonexistent attribute
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     with self.assertRaises(Exception) as context:
         se.delete_attribute("N")
     self.assertTrue("Header Doesn't Exist" in str(context.exception))
    def testDeleteInstancesWithMissing2(self):
        # Deletes instances and checks arrays for correctness
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
        se = StringEnumerator(dataPath, "phenotype")
        se.change_header_name("N1", "gender")
        se.delete_all_instances_without_header_data("gender")
        se.delete_all_instances_without_header_data("N2")
        se.delete_all_instances_without_header_data("N3")

        se.add_attribute_converter_random("gender")
        se.add_attribute_converter_random("N3")
        se.add_class_converter_random()
        se.convert_all_attributes()

        cHeaders = np.array(["gender", "N2", "N3"])
        cMap = {
            "phenotype": {
                "china": "0"
            },
            "gender": {
                "male": "0",
                "female": "1"
            },
            "N3": {
                "young": "0",
                "old": "1"
            }
        }
        cArray = np.array([["0", "1.2", "0"], ["1", "-0.4", "1"]])
        cPArray = np.array(["0", "0"])
        self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
        self.assertTrue(np.array_equal("phenotype", se.classLabel))
        self.assertTrue(np.array_equal(cArray, se.dataFeatures))
        self.assertTrue(np.array_equal(cPArray, se.dataPhenotypes))
        self.assertTrue(se.map == cMap)
    def testChangeClassAndHeaderNames2(self):
        # Changes header and class names. Checks map, and classLabel/dataHeaders correctness
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
        se = StringEnumerator(dataPath, "phenotype")
        se.add_class_converter_random()
        se.change_header_name("N1", "gender")
        se.add_attribute_converter_random("gender")
        se.change_header_name("gender", "Gender")
        se.add_attribute_converter_random("Gender")
        se.add_attribute_converter_random("Gender")
        se.add_attribute_converter_random("gender")
        se.add_attribute_converter_random("N3")
        se.change_header_name("N3", "Age")

        cHeaders = np.array(["Gender", "N2", "Age"])
        cMap = {
            "phenotype": {
                "china": "0",
                "japan": "1",
                "russia": "2"
            },
            "Gender": {
                "male": "0",
                "female": "1"
            },
            "Age": {
                "young": "0",
                "old": "1"
            }
        }
        self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
        self.assertTrue(np.array_equal("phenotype", se.classLabel))
        self.assertTrue(se.map == cMap)
 def testInitHeaders(self):
     # Tests if init gets the headers correct
     dataPath = os.path.join(THIS_DIR,
                             "test/DataSets/Tests/MissingFeatureData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     cHeaders = np.array(["N1", "N2", "N3", "N4"])
     self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
 def testget_paramsFail(self):
     # Get params when not all features/class have been enumerated
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     with self.assertRaises(Exception) as context:
         se.get_params()
     self.assertTrue("Features and Phenotypes must be fully numeric" in str(
         context.exception))
    def testNumericCheck(self):
        # Checks non missing numeric
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
        se = StringEnumerator(dataPath, "phenotype")
        self.assertFalse(se.check_is_full_numeric())
        se.add_attribute_converter_random("N1")
        se.convert_all_attributes()
        self.assertFalse(se.check_is_full_numeric())
        se.add_attribute_converter_random("N3")
        se.add_class_converter_random()
        se.convert_all_attributes()
        self.assertTrue(se.check_is_full_numeric())

        dataPath = os.path.join(THIS_DIR,
                                "test/DataSets/Tests/MissingFeatureData.csv")
        se2 = StringEnumerator(dataPath, "phenotype")
        self.assertTrue(se2.check_is_full_numeric())
 def testchange_header_nameInvalid2(self):
     # Changes non existing header name should raise exception
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     with self.assertRaises(Exception) as context:
         se.change_header_name("N", "N5")
     self.assertTrue(
         "Current Header Doesn't Exist" in str(context.exception))
예제 #8
0
 def test20BitMP5000Iterations(self):
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer20Modified.csv")
     converter = StringEnumerator(dataPath,"Class")
     headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params()
     clf = XCS(learning_iterations=5000,N=2000,nu=10)
     clf.fit(dataFeatures,dataPhenotypes)
     answer = 0.6634
     #print("20 Bit 5000 Iter: "+str(clf.get_final_training_accuracy()))
     self.assertTrue(self.approxEqualOrBetter(0.2,clf.get_final_training_accuracy(),answer,True))
예제 #9
0
 def testContValues5000Iterations(self):
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/ContinuousAndNonBinaryDiscreteAttributes.csv")
     converter = StringEnumerator(dataPath,"Class")
     headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params()
     clf = XCS(learning_iterations=5000)
     clf.fit(dataFeatures,dataPhenotypes)
     answer = 0.64
     #print("Continuous Attributes 5000 Iter: "+str(clf.get_final_training_accuracy()))
     self.assertTrue(self.approxEqualOrBetter(0.2,clf.get_final_training_accuracy(),answer,True))
    def testchange_header_nameInvalid(self):
        # Changes header name to an existing header or class name should raise exception
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
        se = StringEnumerator(dataPath, "phenotype")
        with self.assertRaises(Exception) as context:
            se.change_header_name("N1", "N2")

        self.assertTrue(
            "New Class Name Cannot Be An Already Existing Data Header or Phenotype Name"
            in str(context.exception))
 def testInitFeaturesAndClassRemoval(self):
     # Tests if init gets the features and class arrays correct given missing phenotype data
     dataPath = os.path.join(
         THIS_DIR, "test/DataSets/Tests/MissingFeatureAndPhenotypeData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     cFeatures = np.array([["1.0", "NA", "1.0", "4.0"],
                           ["NA", "1.0", "NA", "1.0"],
                           ["6.0", "NA", "1.0", "1.0"]])
     cClasses = np.array(["1.0", "0.0", "1.0"])
     self.assertTrue(np.array_equal(cFeatures, se.dataFeatures))
     self.assertTrue(np.array_equal(cClasses, se.dataPhenotypes))
 def testInitMissingData(self):
     # Tests if init filters missing data into NAs
     dataPath = os.path.join(THIS_DIR,
                             "test/DataSets/Tests/MissingFeatureData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     cFeatures = np.array([["1.0", "NA", "1.0", "4.0"],
                           ["2.0", "0.0", "1.0", "NA"],
                           ["4.0", "NA", "1.0", "2.0"],
                           ["NA", "1.0", "NA", "1.0"],
                           ["6.0", "NA", "1.0", "1.0"]])
     self.assertTrue(np.array_equal(cFeatures, se.dataFeatures))
 def testChangeClassAndHeaderNames(self):
     # Changes header and class names. Checks map, and classLabel/dataHeaders correctness
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     se.change_class_name("country")
     se.change_header_name("N1", "gender")
     se.change_header_name("N2", "N1")
     se.change_header_name("N1", "floats")
     se.change_header_name("N3", "phenotype")
     se.change_header_name("phenotype", "age")
     cHeaders = np.array(["gender", "floats", "age"])
     self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
     self.assertTrue(np.array_equal("country", se.classLabel))
 def testdelete_attribute(self):
     # Deletes attributes and checks map, headers, and arrays for correctness
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     se.change_header_name("N1", "gender")
     se.add_attribute_converter_random("gender")
     se.add_attribute_converter_random("N3")
     se.delete_attribute("gender")
     cHeaders = np.array(["N2", "N3"])
     cMap = {"N3": {"young": "0", "old": "1"}}
     self.assertTrue(np.array_equal(cHeaders, se.dataHeaders))
     self.assertTrue(np.array_equal("phenotype", se.classLabel))
     self.assertTrue(se.map == cMap)
예제 #15
0
 def testPredictInvVar(self):
     dataPath = os.path.join(THIS_DIR,
                             "test/DataSets/Real/Multiplexer6Modified.csv")
     converter = StringEnumerator(dataPath, "Class")
     headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params(
     )
     clf = XCS(learning_iterations=1000,
               N=500,
               nu=10,
               use_inverse_varinance=True,
               p_explore=0.5)
     clf.fit(dataFeatures, dataPhenotypes)
     print("kkkkkkkkkkkkkkkkkkkkkkkkkkk")
     print(clf.predict(clf.env.formatData.savedRawTrainingData[0]))
예제 #16
0
 def testInverseVariance(self):
     dataPath = os.path.join(THIS_DIR,
                             "test/DataSets/Real/Multiplexer11.csv")
     converter = StringEnumerator(dataPath, "class")
     headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params(
     )
     clf = XCS(learning_iterations=5000,
               N=1000,
               mixing_method="inv-var-only-mixing")
     clf.fit(dataFeatures, dataPhenotypes)
     answer = 0.894
     score = clf.get_final_training_accuracy()
     print("#####################################\n6 Bit 1000 Iter: " +
           str(score))
예제 #17
0
    def testContValuesAndMissingTesting5000Iterations(self):
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/ContinuousAndNonBinaryDiscreteAttributesMissing.csv")
        converter = StringEnumerator(dataPath, "Class")
        headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params()
        formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataPhenotypes, 1)
        np.random.shuffle(formatted)
        dataFeatures = np.delete(formatted, -1, axis=1)
        dataPhenotypes = formatted[:, -1]

        clf = XCS(learning_iterations=5000)
        score = np.mean(cross_val_score(clf, dataFeatures, dataPhenotypes, cv=3))

        answer = 0.5
        #print("Cont & Missing Testing 5000 Iter: " + str(score))
        self.assertTrue(self.approxEqualOrBetter(0.2, score, answer, True))
예제 #18
0
    def test6BitMPTesting1000Iterations(self):
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer6Modified.csv")
        converter = StringEnumerator(dataPath,"Class")
        headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params()
        formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataPhenotypes, 1)
        np.random.shuffle(formatted)
        dataFeatures = np.delete(formatted, -1, axis=1)
        dataPhenotypes = formatted[:, -1]

        clf = XCS(learning_iterations=1000,N=500,nu=10)
        score = np.mean(cross_val_score(clf, dataFeatures, dataPhenotypes, cv=3))

        answer = 0.9
        #print("6 Bit Testing 1000 Iter: "+str(score))
        self.assertTrue(self.approxEqualOrBetter(0.2,score,answer,True))
예제 #19
0
    def testNew(self):
        #Use StringEnumerator to gather data
        converter = StringEnumerator("test/DataSets/Real/Multiplexer11.csv",
                                     "class")
        headers, actionLabel, dataFeatures, dataActions = converter.get_params(
        )

        #Shuffle data
        formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataActions,
                              1)
        np.random.shuffle(formatted)
        dataFeatures = np.delete(formatted, -1, axis=1)
        dataActions = formatted[:, -1]

        #Initialize and train model

        clf_inv_var = XCS(learning_iterations=1000,
                          N=200,
                          use_inverse_varinance=True)
        clf_inv_var.fit(dataFeatures, dataActions)
        breakpoint()
 def testget_params2(self):
     # Get Params Test
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/StringData.csv")
     se = StringEnumerator(dataPath, "phenotype")
     se.change_header_name("N1", "gender")
     se.change_header_name("N2", "floats")
     se.change_header_name("N3", "age")
     se.change_class_name("country")
     se.add_attribute_converter("gender",
                                np.array(["female", "male", "NA", "other"]))
     se.add_attribute_converter("age", np.array(["old", "young"]))
     se.add_class_converter_random()
     se.convert_all_attributes()
     dataHeaders, classLabel, dataFeatures, dataPhenotypes = se.get_params()
     cHeaders = np.array(["gender", "floats", "age"])
     cFeatures = np.array([[1, 1.2, 1], [0, 0.3, np.nan], [0, -0.4, 0],
                           [np.nan, 0, 1]])
     cPhenotypes = np.array([0, 1, 0, 2])
     self.assertEqual("country", classLabel)
     self.assertTrue(np.array_equal(cHeaders, dataHeaders))
     self.assertTrue(np.allclose(cFeatures, dataFeatures, equal_nan=True))
     self.assertTrue(
         np.allclose(cPhenotypes, dataPhenotypes, equal_nan=True))