def runScikiteLCS(dataFile, classLabel, learningIterations, randomSeed, cv=False): data = pd.read_csv(dataFile) dataFeatures = data.drop(classLabel, axis=1).values dataPhenotypes = data[classLabel].values model = skeLCS.eLCS(learningIterations=learningIterations, randomSeed=randomSeed) random.seed(randomSeed) if cv == False: model.fit(dataFeatures, dataPhenotypes) score = model.score(dataFeatures, dataPhenotypes) return np.array([ score, model.timer.globalDeletion, model.timer.globalEvaluation, model.timer.globalMatching, model.timer.globalSelection, model.timer.globalSubsumption, model.timer.globalTime ]) else: formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataPhenotypes, 1) random.shuffle(formatted) dataFeatures = np.delete(formatted, -1, axis=1) dataPhenotypes = formatted[:, -1] return np.mean( cross_val_score(model, dataFeatures, dataPhenotypes, cv=cv))
def testContSpec(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/SpecificityTests/Specifics.csv") converter = StringEnumerator(dataPath, "class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() clf = eLCS(learning_iterations=0,discrete_attribute_limit="c",specified_attributes=np.array([0,2,3])) clf.fit(dataFeatures,dataPhenotypes) self.assertEqual(clf.env.formatData.attributeInfoType,[True,False,True]) self.assertTrue(clf.env.formatData.discretePhenotype)
def test11BitMultiplexer5000Iterations(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer11.csv") converter = StringEnumerator(dataPath, "class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() clf = eLCS(learning_iterations=5000) clf.fit(dataFeatures,dataPhenotypes) answerKey = 1 self.assertTrue(self.approxEqualOrBetter(0.2, clf.get_final_accuracy(), answerKey,True))
def testContinuous1000Iterations(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/ContinuousAndNonBinaryDiscreteAttributes.csv") converter = StringEnumerator(dataPath, "Class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() clf = eLCS(learning_iterations=1000) clf.fit(dataFeatures,dataPhenotypes) answerKey = 0.61 self.assertTrue(self.approxEqualOrBetter(0.2, clf.get_final_accuracy(), answerKey,True))
def testFullFloatsMissing(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/NumericTests/numericFloatsMissing.csv") converter = StringEnumerator(dataPath, "class") #converter = StringEnumerator("Datasets/Tests/NumericTests/numericFloatsMissing.csv", "class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() clf = eLCS(learning_iterations=2) clf.fit(dataFeatures, dataPhenotypes) self.assertTrue(clf.explorIter, 2)
def testMPCV(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/Multiplexer6.csv") converter = StringEnumerator(dataPath, "class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() clf = eLCS(learning_iterations=2000) formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataPhenotypes, 1) np.random.shuffle(formatted) dataFeatures = np.delete(formatted, -1, axis=1) dataPhenotypes = formatted[:, -1] score = np.mean(cross_val_score(clf, dataFeatures, dataPhenotypes,cv=3)) self.assertTrue(self.approxEqual(0.3,score,0.8452))
def testContMissing(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Real/ContinuousAndNonBinaryDiscreteAttributesMissing.csv") converter = StringEnumerator(dataPath, "Class") headers, classLabel, dataFeatures, dataPhenotypes = converter.get_params() clf = eLCS(learning_iterations=5000) formatted = np.insert(dataFeatures, dataFeatures.shape[1], dataPhenotypes, 1) np.random.shuffle(formatted) dataFeatures = np.delete(formatted, -1, axis=1) dataPhenotypes = formatted[:, -1] score = np.mean(cross_val_score(clf, dataFeatures, dataPhenotypes)) self.assertTrue(self.approxEqual(0.2, score, 0.6355))
def testNANPhenotypeExists(self): dataPath = os.path.join(THIS_DIR, "test/DataSets/Tests/NumericTests/numericFloatsNAN.csv") data = pd.read_csv(dataPath, sep=',') # Puts data from csv into indexable np arrays data = data.fillna("NA") dataFeatures = data.drop("class", axis=1).values # splits into an array of instances dataPhenotypes = data["class"].values clf = eLCS(learning_iterations=2) with self.assertRaises(Exception) as context: clf.fit(dataFeatures,dataPhenotypes) self.assertTrue("X and y must be fully numeric" in str(context.exception))
def testParamThetaSub2(self): clf = eLCS(theta_sub=5) self.assertEqual(clf.theta_sub,5)
def testParamN(self): clf = eLCS(N=2000) self.assertEqual(clf.N,2000)
def testDoSub(self): clf = eLCS(do_correct_set_subsumption=True) self.assertEqual(clf.do_correct_set_subsumption,True)
def testParamNInvalidNumeric2(self): with self.assertRaises(Exception) as context: clf = eLCS(N=-2) self.assertTrue("N param must be nonnegative integer" in str(context.exception))
def testDoSub2Invalid(self): with self.assertRaises(Exception) as context: clf = eLCS(do_GA_subsumption=2) self.assertTrue("do_GA_subsumption param must be boolean" in str(context.exception))
def testDoSub2(self): clf = eLCS(do_GA_subsumption=True) self.assertEqual(clf.do_GA_subsumption,True)
def testDiscreteAttributeLimitInv2(self): with self.assertRaises(Exception) as context: clf = eLCS(discrete_attribute_limit=-10) self.assertTrue("discrete_attribute_limit param must be nonnegative integer or 'c' or 'd'" in str(context.exception))
def testParamP_Spec2(self): clf = eLCS(p_spec=0.3) self.assertEqual(clf.p_spec,0.3)
def testRandomSeedInv2(self): with self.assertRaises(Exception) as context: clf = eLCS(random_state=1.2) self.assertTrue("random_state param must be integer or None" in str(context.exception))
def testSelection2(self): clf = eLCS(selection_method="roulette") self.assertEqual(clf.selection_method,"roulette")
def testSelection1(self): clf = eLCS(selection_method="tournament") self.assertEqual(clf.selection_method,"tournament")
def testSelectionInvalid(self): with self.assertRaises(Exception) as context: clf = eLCS(selection_method="hello") self.assertTrue("selection_method param must be 'tournament' or 'roulette'" in str(context.exception))
def testParamThetaSub1(self): clf = eLCS(theta_sub=0) self.assertEqual(clf.theta_sub,0)
def testParamThetaSubInv4(self): with self.assertRaises(Exception) as context: clf = eLCS(theta_sub=-5) self.assertTrue("theta_sub param must be nonnegative integer" in str(context.exception))
def testRandomSeed2(self): clf = eLCS(random_state=200) self.assertEqual(clf.random_state,200)
def testParamNNonnumeric(self): with self.assertRaises(Exception) as context: clf = eLCS(N="hello") self.assertTrue("N param must be nonnegative integer" in str(context.exception))
def testParamP_SpecInv3(self): with self.assertRaises(Exception) as context: clf = eLCS(p_spec=-1.2) self.assertTrue("p_spec param must be float from 0 - 1" in str(context.exception))
def testRandomSeed3(self): clf = eLCS(random_state=None) self.assertEqual(clf.random_state,None)
def testParamP_Spec3(self): clf = eLCS(p_spec=1) self.assertEqual(clf.p_spec,1)
def testRebootFilenameInv2(self): with self.assertRaises(Exception) as context: clf = eLCS(reboot_filename=True) self.assertTrue("reboot_filename param must be None or String from pickle" in str(context.exception))
def testRebootFilename2(self): clf = eLCS(reboot_filename=None) self.assertEqual(clf.reboot_filename, None)
def testParamThetaDel2(self): clf = eLCS(theta_del=5) self.assertEqual(clf.theta_del,5)