def runExperiment(self, classifier: Classifier, parameter: Parameter,
                   experimentPerformance: ExperimentPerformance,
                   crossValidation: CrossValidation, testSet: InstanceList):
     for i in range(self.K):
         trainSet = InstanceList(crossValidation.getTrainFold(i))
         classifier.train(trainSet, parameter)
         experimentPerformance.add(classifier.test(testSet))
 def compare(self, classifier1: ExperimentPerformance,
             classifier2: ExperimentPerformance) -> StatisticalTestResult:
     statistic = self.__testStatistic(classifier1, classifier2)
     degreeOfFreedom1 = classifier1.numberOfExperiments()
     degreeOfFreedom2 = classifier2.numberOfExperiments() // 2
     return StatisticalTestResult(
         Distribution.fDistribution(statistic, degreeOfFreedom1,
                                    degreeOfFreedom2), True)
    def execute(self, experiment: Experiment) -> ExperimentPerformance:
        """
        Execute K-fold cross-validation with separate test set with the given classifier on the given data set using the
        given parameters.

        PARAMETERS
        ----------
        experiment : Experiment
            Experiment to be run.

        RETURNS
        -------
        ExperimentPerformance
            An ExperimentPerformance instance.
        """
        result = ExperimentPerformance()
        instanceList = experiment.getDataSet().getInstanceList()
        partition = Partition(instanceList, 0.25,
                              experiment.getParameter().getSeed(), True)
        crossValidation = KFoldCrossValidation(
            partition.get(1).getInstances(), self.K,
            experiment.getParameter().getSeed())
        self.runExperiment(experiment.getClassifier(),
                           experiment.getParameter(), result, crossValidation,
                           partition.get(0))
        return result
 def __testStatistic(self, classifier1: ExperimentPerformance,
                     classifier2: ExperimentPerformance):
     if classifier1.numberOfExperiments(
     ) != classifier2.numberOfExperiments():
         raise StatisticalTestNotApplicable(
             "In order to apply a paired test, you need to have the same number of "
             "experiments in both algorithms.")
     if classifier1.numberOfExperiments() != 10:
         raise StatisticalTestNotApplicable(
             "In order to apply a 5x2 test, you need to have 10 experiments."
         )
     numerator = 0
     difference = []
     for i in range(classifier1.numberOfExperiments()):
         difference.append(
             classifier1.getErrorRate(i) - classifier2.getErrorRate(i))
         numerator += difference[i] * difference[i]
     denominator = 0
     for i in range(classifier1.numberOfExperiments() // 2):
         mean = (difference[2 * i] + difference[2 * i + 1]) / 2
         variance = (difference[2 * i] - mean) * (difference[2 * i] - mean) + (difference[2 * i + 1] - mean) \
                    * (difference[2 * i + 1] - mean)
         denominator += variance
     denominator *= 2
     if denominator == 0:
         raise StatisticalTestNotApplicable("Variance is 0.")
     return numerator / denominator
Example #5
0
 def __testStatistic(self, classifier1: ExperimentPerformance, classifier2: ExperimentPerformance):
     if classifier1.numberOfExperiments() != classifier2.numberOfExperiments():
         raise StatisticalTestNotApplicable("In order to apply a paired test, you need to have the same number of "
                                            "experiments in both algorithms.")
     difference = []
     total = 0
     for i in range(classifier1.numberOfExperiments()):
         difference.append(classifier1.getErrorRate(i) - classifier2.getErrorRate(i))
         total += difference[i]
     mean = total / classifier1.numberOfExperiments()
     total = 0
     for i in range(classifier1.numberOfExperiments()):
         total += (difference[i] - mean) * (difference[i] - mean)
     standardDeviation = math.sqrt(total / (classifier1.numberOfExperiments() - 1))
     if standardDeviation == 0:
         raise StatisticalTestNotApplicable("Variance is 0.")
     return math.sqrt(classifier1.numberOfExperiments()) * mean / standardDeviation
Example #6
0
 def execute(self, experiment: Experiment) -> ExperimentPerformance:
     result = ExperimentPerformance()
     for j in range(self.M):
         instanceList = experiment.getDataSet().getInstanceList()
         partition = Partition(instanceList, 0.25,
                               experiment.getParameter().getSeed(), True)
         crossValidation = StratifiedKFoldCrossValidation(
             Partition(partition.get(1)).getLists(), self.K,
             experiment.getParameter().getSeed())
         self.runExperiment(experiment.getClassifier(),
                            experiment.getParameter(), result,
                            crossValidation, partition.get(0))
     return result
    def execute(self, experiment: Experiment) -> ExperimentPerformance:
        """
        Execute the bootstrap run with the given classifier on the given data set using the given parameters.

        PARAMETERS
        ----------
        experiment : Experiment
            Experiment to be run.

        RETURNS
        -------
        ExperimentPerformance
            An ExperimentPerformance instance.
        """
        result = ExperimentPerformance()
        for i in range(self.__numberOfBootstraps):
            bootstrap = Bootstrap(experiment.getDataSet().getInstances(),
                                  i + experiment.getParameter().getSeed())
            bootstrapSample = InstanceList(bootstrap.getSample())
            experiment.getClassifier().train(bootstrapSample,
                                             experiment.getParameter())
            result.add(experiment.getClassifier().test(
                experiment.getDataSet().getInstanceList()))
        return result
    def execute(self, experiment: Experiment) -> ExperimentPerformance:
        """
        Execute Stratified K-fold cross-validation with the given classifier on the given data set using the given
        parameters.

        PARAMETERS
        ----------
        experiment : Experiment
            Experiment to be run.

        RETURNS
        -------
        ExperimentPerformance
            An ExperimentPerformance instance.
        """
        result = ExperimentPerformance()
        crossValidation = StratifiedKFoldCrossValidation(experiment.getDataSet().getClassInstances(), self.K,
                                                         experiment.getParameter().getSeed())
        self.runExperiment(experiment.getClassifier(), experiment.getParameter(), result, crossValidation)
        return result
Example #9
0
 def compare(self, classifier1: ExperimentPerformance, classifier2: ExperimentPerformance) -> StatisticalTestResult:
     if classifier1.numberOfExperiments() != classifier2.numberOfExperiments():
         raise StatisticalTestNotApplicable("In order to apply a paired test, you need to have the same number of "
                                            "experiments in both algorithms.")
     plus = 0
     minus = 0
     for i in range(classifier1.numberOfExperiments()):
         if classifier1.getErrorRate(i) < classifier2.getErrorRate(i):
             plus = plus + 1
         else:
             if classifier1.getErrorRate(i) > classifier2.getErrorRate(i):
                 minus = minus + 1
     total = plus + minus
     pValue = 0
     if total == 0:
         raise StatisticalTestNotApplicable("Variance is 0.")
     for i in range(plus + 1):
         pValue += self.__binomial(total, i) / math.pow(2, total)
     return StatisticalTestResult(pValue, False)
Example #10
0
 def compare(self, classifier1: ExperimentPerformance, classifier2: ExperimentPerformance) -> StatisticalTestResult:
     statistic = self.__testStatistic(classifier1, classifier2)
     degreeOfFreedom = classifier1.numberOfExperiments() - 1
     return StatisticalTestResult(Distribution.tDistribution(statistic, degreeOfFreedom), False)