Ejemplo n.º 1
0
class ClassifierExecutor(ABC):
    def __init__(self):
        self.originalInstances = None
        self.instances = None

    def loadFeatures(self, filename, filter):
        loader = Loader("weka.core.converters.ArffLoader")
        data = loader.load_file(filename)
        self.originalInstances = data

        if filter:
            for i in range(0, filter.length):
                filter[i].setInputFormat(self.originalInstances)

                self.originalInstances = Instance(
                    javabridge.static_call(
                        "Lweka/filters/Filter;", "useFilter",
                        "(Lweka/core/Instances;Lweka/filters/Filter;)Lweka/core/Instances;",
                        self.originalInstances, filter[i]))
        self.instances = self.originalInstances
        return self.originalInstances.num_attributes() - 1

    def loadFeatures(self, filename):
        f = Filter()
        return self.loadFeatures(filename, f)

    def loadFeatures(self):
        self.instances = self.originalInstances

    def getFeaturesSize(self):
        if self.originalInstances is None:
            return -1
        return self.originalInstances.num_attributes() - 1

    @classmethod
    @abstractmethod
    def execute(self, featureInclusion, k):
        pass

    @classmethod
    @abstractmethod
    def execute(self, featureInclusion, kFold, classIndex):
        pass
Ejemplo n.º 2
0
class CHA():
    def __init__(self):
        self.features = {True, True, True, True}
        self.featureSize = 0
        self.databaseName = "dataset/segment.arff"
        self.runtime = 20
        self.limit = 6
        self.mr = 0.1
        self.KFOLD = 10

        self.bestFitness = 0
        self.bestFoodSource = None
        self.foodSources = set()
        self.visitedFoodSources = set()
        self.scouts = set()
        self.abandoned = set()
        self.markedToRemoved = set()
        self.neighbors = set()
        if self.mr > 0:
            self.perturbation = PerturbationStrategy.USE_MR
        else:
            self.perturbation = PerturbationStrategy.CHANGE_ONE_FEATURE

        self.states = 0
        self.data = None

    def loadFeatures(self, filename, filter):
        loader = Loader("weka.core.converters.ArffLoader")
        data = loader.load_file(filename)
        self.originalInstances = data
        if filter:
            for i in range(0, filter.length):
                filter[i].setInputFormat(self.originalInstances)

                self.originalInstances = Instance(
                    javabridge.static_call(
                        "Lweka/filters/Filter;", "useFilter",
                        "(Lweka/core/Instances;Lweka/filters/Filter;)Lweka/core/Instances;",
                        self.originalInstances, filter[i]))
        self.instances = self.originalInstances
        return self.originalInstances.num_attributes() - 1

    def loadFeatures(self):
        #self.instances = self.originalInstances
        #loader = Loader("weka.core.converters.ArffLoader")
        #data = loader.load_file(self.databaseName)
        #self.originalInstances = data
        #self.instances = Instances.copy_instances(self.originalInstances)
        #return self.originalInstances.num_attributes - 1
        ds = arff.load(open(self.databaseName, 'r'))
        self.data = np.array(ds['data'])
        self.featureSize = self.data.shape[1] - 1
        return self.data.shape[0]

    def executeKFoldClassifier(self, featureInclusion, kFold):
        deleteFeatures = 0
        for i in range(0, len(featureInclusion)):
            if featureInclusion[i]:
                self.instances.deleteAttributeAt(i - deleteFeatures)
                deleteFeatures += 1
        self.instances.setClassIndex(self.instances.numAttributes - 1)

        cvParameterSelection = javabridge.make_instance(
            "weka/classifiers/meta/CVParameterSelection", "()V")
        javabridge.call(cvParameterSelection, "setNumFolds", "(I)V", kFold)
        javabridge.call(cvParameterSelection,
                        "buildClassifier(weka/core/Instances)V",
                        self.instances)

        eval = Evaluation(self.instances)
        eval.crossvalidate_model(cvParameterSelection, self.instances, kFold,
                                 random())

        return eval.percent_correct()

    def executeKFoldClassifier(self, featureInclusion, kFold, classIndex):
        deletedFeatures = 0
        for i in range(0, len(featureInclusion)):
            if featureInclusion[i] == False:
                self.instances.deleteAttributeAt(i - deletedFeatures)
                deletedFeatures += 1
        '''
        self.instances.setClassIndex(classIndex)

        cvParameterSelection = javabridge.make_instance("Lweka/classifiers/meta/CVParameterSelection","()V")
        javabridge.call(cvParameterSelection, "setNumFolds", "(I)V", kFold)
        javabridge.call(cvParameterSelection,"buildClassifier(Lweka/core/Instances)V",self.instances)

        eval = Evaluation(self.instances)
        eval.crossvalidate_model(cvParameterSelection, self.instances, kFold, Random(1))

        return eval.percent_correct()'''

    def executeFullFeaturesWithNoFilters(self):
        print('executeFullFeaturesWithNoFilters')
        self.executor.loadFeatures(self.databaseName,
                                   self.replaceMissingValues)
        result = self.executor.execute(self.features, self.KFOLD)
        print('Full ' + result + '%')

    def executeWithNoFilter(self):
        print('executeWithNoFilter')
        self.executor.loadFeatures(self.databaseName,
                                   self.replaceMissingValues)
        # self.featureSelection = FeatureSelection(self.runtime,
        #                                          self.limit, self.mr, self.executor)
        # self.featureSelection.setExecutor(self.executor)
        # self.featureSelection.execute()
        self.executeFeatureSelection()

    def initializeFoodSource(self):
        print('initializeFoodSources')
        for i in range(0, self.featureSize):
            self.states += 1
            features = np.zeros(self.featureSize)
            features[i] = True
            curFitness = self.calculateFitness(features)
            fs = FoodSource(features, curFitness, 1)
            self.foodSources.add(fs)
            if (curFitness > self.bestFitness):
                self.bestFoodSource = fs
                self.bestFitness = curFitness

    def sendEmployedBees(self):
        print('sendEmployedBees')
        self.scouts = set()
        self.markedToRemoved = set()
        self.neighbors = set()

        for fs in self.foodSources:
            self.sendBee(fs)

        # remove all markedToRemoved
        for mtr in self.markedToRemoved:
            if mtr in self.foodSources:
                self.foodSources.remove(mtr)

        for n in self.neighbors:
            self.foodSources.add(n)

    def sendOnlookerBees(self):
        print('SendOnlookerBees')
        self.markedToRemoved = set()
        self.neighbors = set()

        min = 0
        range = 0
        for s in self.foodSources:
            if s.getFitness() < min:
                min = s.getFitness()
            if s.getFitness() > range:
                range = s.getFitness()

        for fs in self.foodSources:
            prob = (fs.getFitness() - min) / range
            if random.random() < prob:
                self.sendBee(fs)
            else:
                fs.incrementLimit()

        for mtr in self.markedToRemoved:
            if mtr in self.foodSources:
                self.foodSources.remove(mtr)

        for n in self.neighbors:
            self.foodSources.add(n)

    def sendBee(self, foodSource):
        features = foodSource.getFeatureInclusion()
        nrFeatures = foodSource.getNrFeatures()
        times = 0
        modifedFoodSource = None
        while 1:
            times += 1
            if self.perturbation == PerturbationStrategy.CHANGE_ONE_FEATURE:
                index = round(random.random() * (self.featureSize - 1))
                if features[index] is False:
                    nrFeatures += 1
                    features[index] = True
            elif self.perturbation == PerturbationStrategy.USE_MR:
                for i in range(0, self.featureSize):
                    if random.random() < self.mr:
                        if features[i] == False:
                            nrFeatures += 1
                            features[i] = True

            modifedFoodSource = FoodSource(features)
            if (modifedFoodSource not in self.foodSources and \
                            modifedFoodSource not in self.neighbors and \
                            modifedFoodSource not in self.abandoned and \
                            modifedFoodSource not in self.visitedFoodSources) or \
                            times > self.featureSize:
                break

        if modifedFoodSource not in self.foodSources or \
            modifedFoodSource not in self.neighbors or \
            modifedFoodSource not in self.visitedFoodSources or \
            modifedFoodSource not in self.abandoned:
            self.states += 1
            fitness = self.calculateFitness(features)
            modifedFoodSource.setFitness(fitness)
            modifedFoodSource.setNrFeatures(nrFeatures)
            if foodSource.getFitness() > fitness or \
                    (fitness == foodSource.getFitness() and nrFeatures > foodSource.getNrFeatures()):
                foodSource.incrementLimit()
                if foodSource.getLimit() >= self.limit:
                    self.markAbandonsFoodSource(foodSource)
                    self.createScoutBee()
                self.visitedFoodSources.add(modifedFoodSource)
            else:
                if fitness > self.bestFitness or (
                        fitness == self.bestFitness
                        and nrFeatures < self.bestFoodSource.getNrFeatures()):
                    self.bestFoodSource = FoodSource(
                        modifedFoodSource.getFeatureInclusion(),
                        modifedFoodSource.getFitness(),
                        modifedFoodSource.getNrFeatures())
                    self.bestFitness = fitness
                self.neighbors.add(modifedFoodSource)
        return True

    def createScoutBee(self):
        features = np.zeros(self.featureSize)
        nrFeatures = 0
        for j in range(0, self.featureSize):
            inclusio = bool(random.getrandbits(1))
            if inclusio:
                nrFeatures += 1
            features[j] = inclusio

        curFitness = self.calculateFitness(features)
        foodSource = FoodSource(features, curFitness, nrFeatures)
        if foodSource not in self.foodSources and \
                        foodSource not in self.neighbors and \
                        foodSource not in self.abandoned and \
                        foodSource not in self.visitedFoodSources:
            self.states += 1
            self.scouts.add(foodSource)

    def sendScoutBeesAndRemoveAbandonsFoodSource(self):
        #remove abandoned
        for abd in self.abandoned:
            if abd in self.foodSources:
                self.foodSources.remove(abd)

        for s in self.scouts:
            self.foodSources.add(s)

    def markAbandonsFoodSource(self, foodSource):
        self.abandoned.add(foodSource)

    def calculateFitness(self, featureInclusion):
        deletedFeatures = 0

        data = self.data
        for i in range(0, len(featureInclusion)):
            if featureInclusion[i] == False:
                data = np.delete(data, np.s_[i - deletedFeatures], 1)
                deletedFeatures += 1

        rows, cols = data.shape
        X = data[:, :cols - 1]
        y = data[:, cols - 1:]
        y = y.ravel()
        kf = KFold(n_splits=10)
        for train_index, test_index in kf.split(X):
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]
        n = KNeighborsClassifier(n_neighbors=3)
        n.fit(X_train, y_train)

        score = n.score(X_test, y_test)
        return score

    def executeFeatureSelection(self):
        self.visitedFoodSources = set()
        self.states = 0
        time = datetime.now()
        self.initializeFoodSource()
        print('init time: ', datetime.now() - time)
        for i in range(0, self.runtime):
            self.sendEmployedBees()
            self.sendOnlookerBees()
            self.sendScoutBeesAndRemoveAbandonsFoodSource()

        time = (datetime.now() - time) / 60000
        self.logBestSolutionAndTime(time)
        self.states = 0

    def logBestSolutionAndTime(self, t):
        print('Time: ', t)
        print('Best ', self.bestFoodSource.getFeatureInclusion())
        print('Feature selection End.')

    def runCHA(self):
        self.loadFeatures()
        self.executeFeatureSelection()