def folds(self, nfolds=10, seed=None): """ Get (training,testing) datasets for cross-validation. Arguments: nfolds (int, optional): Number of folds. Default value is 10. seed (int, optional): Seed value for shuffling dataset. Default value is random int 0 <= x <= 10000. Returns: list of (Instances,Instances) tuples: Each list element is a pair of (training,testing) datasets, respectively. """ seed = seed or randint(0, 10000) rnd = WekaRandom(seed) fold_size = labmath.ceil(self.instances.num_instances / nfolds) # Shuffle the dataset. instances = WekaInstances.copy_instances(self.instances) instances.randomize(rnd) folds = [] for i in range(nfolds): offset = i * fold_size testing_end = min(offset + fold_size, instances.num_instances - 1) # Calculate dataset indices for testing and training data. testing_range = (offset, testing_end - offset) left_range = (0, offset) right_range = (testing_end, instances.num_instances - testing_end) # If there's nothing to test, move on. if testing_range[1] < 1: continue # Create testing and training folds. testing = WekaInstances.copy_instances(instances, *testing_range) left = WekaInstances.copy_instances(instances, *left_range) right = WekaInstances.copy_instances(instances, *right_range) training = WekaInstances.append_instances(left, right) # Add fold to collection. folds.append((training, testing)) return folds
def test_ceil_bad_params(self): with self.assertRaises(TypeError): self._test(None, labmath.ceil(None)) with self.assertRaises(TypeError): self._test(None, labmath.ceil("abc"))
def test_ceil(self): self._test(int, type(labmath.ceil(1))) self._test(1, labmath.ceil(1)) self._test(2, labmath.ceil(1.1)) self._test(3, labmath.ceil(2.5)) self._test(4, labmath.ceil(3.9))