예제 #1
0
def runScikiteLCS(dataFile,
                  classLabel,
                  learningIterations,
                  randomSeed,
                  cv=False):
    data = pd.read_csv(dataFile)
    dataFeatures = data.drop(classLabel, axis=1).values
    dataPhenotypes = data[classLabel].values
    model = skeLCS.eLCS(learningIterations=learningIterations,
                        randomSeed=randomSeed)
    random.seed(randomSeed)

    if cv == False:
        model.fit(dataFeatures, dataPhenotypes)
        score = model.score(dataFeatures, dataPhenotypes)
        return np.array([
            score, model.timer.globalDeletion, model.timer.globalEvaluation,
            model.timer.globalMatching, model.timer.globalSelection,
            model.timer.globalSubsumption, model.timer.globalTime
        ])
    else:
        formatted = np.insert(dataFeatures, dataFeatures.shape[1],
                              dataPhenotypes, 1)
        random.shuffle(formatted)
        dataFeatures = np.delete(formatted, -1, axis=1)
        dataPhenotypes = formatted[:, -1]
        return np.mean(
            cross_val_score(model, dataFeatures, dataPhenotypes, cv=cv))
예제 #2
0
 def testContSpec(self):
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/SpecificityTests/Specifics.csv")
     converter = StringEnumerator(dataPath, "class")
     headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params()
     clf = eLCS(learning_iterations=0,discrete_attribute_limit="c",specified_attributes=np.array([0,2,3]))
     clf.fit(dataFeatures,dataPhenotypes)
     self.assertEqual(clf.env.formatData.attributeInfoType,[True,False,True])
     self.assertTrue(clf.env.formatData.discretePhenotype)
예제 #3
0
 def test11BitMultiplexer5000Iterations(self):
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer11.csv")
     converter = StringEnumerator(dataPath, "class")
     headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params()
     clf = eLCS(learning_iterations=5000)
     clf.fit(dataFeatures,dataPhenotypes)
     answerKey = 1
     self.assertTrue(self.approxEqualOrBetter(0.2, clf.get_final_accuracy(), answerKey,True))
예제 #4
0
 def testContinuous1000Iterations(self):
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/ContinuousAndNonBinaryDiscreteAttributes.csv")
     converter = StringEnumerator(dataPath, "Class")
     headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params()
     clf = eLCS(learning_iterations=1000)
     clf.fit(dataFeatures,dataPhenotypes)
     answerKey = 0.61
     self.assertTrue(self.approxEqualOrBetter(0.2, clf.get_final_accuracy(), answerKey,True))
예제 #5
0
 def testFullFloatsMissing(self):
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/NumericTests/numericFloatsMissing.csv")
     converter = StringEnumerator(dataPath, "class")
     #converter = StringEnumerator("Datasets/Tests/NumericTests/numericFloatsMissing.csv", "class")
     headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params()
     clf = eLCS(learning_iterations=2)
     clf.fit(dataFeatures, dataPhenotypes)
     self.assertTrue(clf.explorIter, 2)
예제 #6
0
 def testMPCV(self):
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer6.csv")
     converter = StringEnumerator(dataPath, "class")
     headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params()
     clf = eLCS(learning_iterations=2000)
     formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataPhenotypes, 1)
     np.random.shuffle(formatted)
     dataFeatures = np.delete(formatted, -1, axis=1)
     dataPhenotypes = formatted[:, -1]
     score = np.mean(cross_val_score(clf, dataFeatures, dataPhenotypes,cv=3))
     self.assertTrue(self.approxEqual(0.3,score,0.8452))
예제 #7
0
 def testContMissing(self):
     dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/ContinuousAndNonBinaryDiscreteAttributesMissing.csv")
     converter = StringEnumerator(dataPath, "Class")
     headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params()
     clf = eLCS(learning_iterations=5000)
     formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataPhenotypes, 1)
     np.random.shuffle(formatted)
     dataFeatures = np.delete(formatted, -1, axis=1)
     dataPhenotypes = formatted[:, -1]
     score = np.mean(cross_val_score(clf, dataFeatures, dataPhenotypes))
     self.assertTrue(self.approxEqual(0.2, score, 0.6355))
예제 #8
0
    def testNANPhenotypeExists(self):
        dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/NumericTests/numericFloatsNAN.csv")
        data = pd.read_csv(dataPath, sep=',')  # Puts data from csv into indexable np arrays
        data = data.fillna("NA")
        dataFeatures = data.drop("class", axis=1).values  # splits into an array of instances
        dataPhenotypes = data["class"].values
        clf = eLCS(learning_iterations=2)

        with self.assertRaises(Exception) as context:
            clf.fit(dataFeatures,dataPhenotypes)
        self.assertTrue("X and y must be fully numeric" in str(context.exception))
예제 #9
0
 def testParamThetaSub2(self):
     clf = eLCS(theta_sub=5)
     self.assertEqual(clf.theta_sub,5)
예제 #10
0
 def testParamN(self):
     clf = eLCS(N=2000)
     self.assertEqual(clf.N,2000)
예제 #11
0
 def testDoSub(self):
     clf = eLCS(do_correct_set_subsumption=True)
     self.assertEqual(clf.do_correct_set_subsumption,True)
예제 #12
0
 def testParamNInvalidNumeric2(self):
     with self.assertRaises(Exception) as context:
         clf = eLCS(N=-2)
     self.assertTrue("N param must be nonnegative integer" in str(context.exception))
예제 #13
0
 def testDoSub2Invalid(self):
     with self.assertRaises(Exception) as context:
         clf = eLCS(do_GA_subsumption=2)
     self.assertTrue("do_GA_subsumption param must be boolean" in str(context.exception))
예제 #14
0
 def testDoSub2(self):
     clf = eLCS(do_GA_subsumption=True)
     self.assertEqual(clf.do_GA_subsumption,True)
예제 #15
0
 def testDiscreteAttributeLimitInv2(self):
     with self.assertRaises(Exception) as context:
         clf = eLCS(discrete_attribute_limit=-10)
     self.assertTrue("discrete_attribute_limit param must be nonnegative integer or 'c' or 'd'" in str(context.exception))
예제 #16
0
 def testParamP_Spec2(self):
     clf = eLCS(p_spec=0.3)
     self.assertEqual(clf.p_spec,0.3)
예제 #17
0
 def testRandomSeedInv2(self):
     with self.assertRaises(Exception) as context:
         clf = eLCS(random_state=1.2)
     self.assertTrue("random_state param must be integer or None" in str(context.exception))
예제 #18
0
 def testSelection2(self):
     clf = eLCS(selection_method="roulette")
     self.assertEqual(clf.selection_method,"roulette")
예제 #19
0
 def testSelection1(self):
     clf = eLCS(selection_method="tournament")
     self.assertEqual(clf.selection_method,"tournament")
예제 #20
0
 def testSelectionInvalid(self):
     with self.assertRaises(Exception) as context:
         clf = eLCS(selection_method="hello")
     self.assertTrue("selection_method param must be 'tournament' or 'roulette'" in str(context.exception))
예제 #21
0
 def testParamThetaSub1(self):
     clf = eLCS(theta_sub=0)
     self.assertEqual(clf.theta_sub,0)
예제 #22
0
 def testParamThetaSubInv4(self):
     with self.assertRaises(Exception) as context:
         clf = eLCS(theta_sub=-5)
     self.assertTrue("theta_sub param must be nonnegative integer" in str(context.exception))
예제 #23
0
 def testRandomSeed2(self):
     clf = eLCS(random_state=200)
     self.assertEqual(clf.random_state,200)
예제 #24
0
 def testParamNNonnumeric(self):
     with self.assertRaises(Exception) as context:
         clf = eLCS(N="hello")
     self.assertTrue("N param must be nonnegative integer" in str(context.exception))
예제 #25
0
 def testParamP_SpecInv3(self):
     with self.assertRaises(Exception) as context:
         clf = eLCS(p_spec=-1.2)
     self.assertTrue("p_spec param must be float from 0 - 1" in str(context.exception))
예제 #26
0
 def testRandomSeed3(self):
     clf = eLCS(random_state=None)
     self.assertEqual(clf.random_state,None)
예제 #27
0
 def testParamP_Spec3(self):
     clf = eLCS(p_spec=1)
     self.assertEqual(clf.p_spec,1)
예제 #28
0
 def testRebootFilenameInv2(self):
     with self.assertRaises(Exception) as context:
         clf = eLCS(reboot_filename=True)
     self.assertTrue("reboot_filename param must be None or String from pickle" in str(context.exception))
예제 #29
0
 def testRebootFilename2(self):
     clf = eLCS(reboot_filename=None)
     self.assertEqual(clf.reboot_filename, None)
예제 #30
0
 def testParamThetaDel2(self):
     clf = eLCS(theta_del=5)
     self.assertEqual(clf.theta_del,5)