def runExperiment(self, classifier: Classifier, parameter: Parameter, experimentPerformance: ExperimentPerformance, crossValidation: CrossValidation, testSet: InstanceList): for i in range(self.K): trainSet = InstanceList(crossValidation.getTrainFold(i)) classifier.train(trainSet, parameter) experimentPerformance.add(classifier.test(testSet))
def compare(self, classifier1: ExperimentPerformance, classifier2: ExperimentPerformance) -> StatisticalTestResult: statistic = self.__testStatistic(classifier1, classifier2) degreeOfFreedom1 = classifier1.numberOfExperiments() degreeOfFreedom2 = classifier2.numberOfExperiments() // 2 return StatisticalTestResult( Distribution.fDistribution(statistic, degreeOfFreedom1, degreeOfFreedom2), True)
def execute(self, experiment: Experiment) -> ExperimentPerformance: """ Execute K-fold cross-validation with separate test set with the given classifier on the given data set using the given parameters. PARAMETERS ---------- experiment : Experiment Experiment to be run. RETURNS ------- ExperimentPerformance An ExperimentPerformance instance. """ result = ExperimentPerformance() instanceList = experiment.getDataSet().getInstanceList() partition = Partition(instanceList, 0.25, experiment.getParameter().getSeed(), True) crossValidation = KFoldCrossValidation( partition.get(1).getInstances(), self.K, experiment.getParameter().getSeed()) self.runExperiment(experiment.getClassifier(), experiment.getParameter(), result, crossValidation, partition.get(0)) return result
def __testStatistic(self, classifier1: ExperimentPerformance, classifier2: ExperimentPerformance): if classifier1.numberOfExperiments( ) != classifier2.numberOfExperiments(): raise StatisticalTestNotApplicable( "In order to apply a paired test, you need to have the same number of " "experiments in both algorithms.") if classifier1.numberOfExperiments() != 10: raise StatisticalTestNotApplicable( "In order to apply a 5x2 test, you need to have 10 experiments." ) numerator = 0 difference = [] for i in range(classifier1.numberOfExperiments()): difference.append( classifier1.getErrorRate(i) - classifier2.getErrorRate(i)) numerator += difference[i] * difference[i] denominator = 0 for i in range(classifier1.numberOfExperiments() // 2): mean = (difference[2 * i] + difference[2 * i + 1]) / 2 variance = (difference[2 * i] - mean) * (difference[2 * i] - mean) + (difference[2 * i + 1] - mean) \ * (difference[2 * i + 1] - mean) denominator += variance denominator *= 2 if denominator == 0: raise StatisticalTestNotApplicable("Variance is 0.") return numerator / denominator
def __testStatistic(self, classifier1: ExperimentPerformance, classifier2: ExperimentPerformance): if classifier1.numberOfExperiments() != classifier2.numberOfExperiments(): raise StatisticalTestNotApplicable("In order to apply a paired test, you need to have the same number of " "experiments in both algorithms.") difference = [] total = 0 for i in range(classifier1.numberOfExperiments()): difference.append(classifier1.getErrorRate(i) - classifier2.getErrorRate(i)) total += difference[i] mean = total / classifier1.numberOfExperiments() total = 0 for i in range(classifier1.numberOfExperiments()): total += (difference[i] - mean) * (difference[i] - mean) standardDeviation = math.sqrt(total / (classifier1.numberOfExperiments() - 1)) if standardDeviation == 0: raise StatisticalTestNotApplicable("Variance is 0.") return math.sqrt(classifier1.numberOfExperiments()) * mean / standardDeviation
def execute(self, experiment: Experiment) -> ExperimentPerformance: result = ExperimentPerformance() for j in range(self.M): instanceList = experiment.getDataSet().getInstanceList() partition = Partition(instanceList, 0.25, experiment.getParameter().getSeed(), True) crossValidation = StratifiedKFoldCrossValidation( Partition(partition.get(1)).getLists(), self.K, experiment.getParameter().getSeed()) self.runExperiment(experiment.getClassifier(), experiment.getParameter(), result, crossValidation, partition.get(0)) return result
def execute(self, experiment: Experiment) -> ExperimentPerformance: """ Execute the bootstrap run with the given classifier on the given data set using the given parameters. PARAMETERS ---------- experiment : Experiment Experiment to be run. RETURNS ------- ExperimentPerformance An ExperimentPerformance instance. """ result = ExperimentPerformance() for i in range(self.__numberOfBootstraps): bootstrap = Bootstrap(experiment.getDataSet().getInstances(), i + experiment.getParameter().getSeed()) bootstrapSample = InstanceList(bootstrap.getSample()) experiment.getClassifier().train(bootstrapSample, experiment.getParameter()) result.add(experiment.getClassifier().test( experiment.getDataSet().getInstanceList())) return result
def execute(self, experiment: Experiment) -> ExperimentPerformance: """ Execute Stratified K-fold cross-validation with the given classifier on the given data set using the given parameters. PARAMETERS ---------- experiment : Experiment Experiment to be run. RETURNS ------- ExperimentPerformance An ExperimentPerformance instance. """ result = ExperimentPerformance() crossValidation = StratifiedKFoldCrossValidation(experiment.getDataSet().getClassInstances(), self.K, experiment.getParameter().getSeed()) self.runExperiment(experiment.getClassifier(), experiment.getParameter(), result, crossValidation) return result
def compare(self, classifier1: ExperimentPerformance, classifier2: ExperimentPerformance) -> StatisticalTestResult: if classifier1.numberOfExperiments() != classifier2.numberOfExperiments(): raise StatisticalTestNotApplicable("In order to apply a paired test, you need to have the same number of " "experiments in both algorithms.") plus = 0 minus = 0 for i in range(classifier1.numberOfExperiments()): if classifier1.getErrorRate(i) < classifier2.getErrorRate(i): plus = plus + 1 else: if classifier1.getErrorRate(i) > classifier2.getErrorRate(i): minus = minus + 1 total = plus + minus pValue = 0 if total == 0: raise StatisticalTestNotApplicable("Variance is 0.") for i in range(plus + 1): pValue += self.__binomial(total, i) / math.pow(2, total) return StatisticalTestResult(pValue, False)
def compare(self, classifier1: ExperimentPerformance, classifier2: ExperimentPerformance) -> StatisticalTestResult: statistic = self.__testStatistic(classifier1, classifier2) degreeOfFreedom = classifier1.numberOfExperiments() - 1 return StatisticalTestResult(Distribution.tDistribution(statistic, degreeOfFreedom), False)