예제 #1
0
class ValidationStep:
    def __init__(self, params):

        # kfold is default validation technique
        self.validation_type = params.get("validation_type", "kfold")

        if self.validation_type == "kfold":
            self.validator = KFoldValidator(params)
        else:
            raise Exception("Other validation types are not implemented yet!")
        """
        elif self.validation_type == "split":
            self.validator = SplitValidator(params, data)
        elif self.validation_type == "with_dataset":
            self.validator = WithDatasetValidator(params, data)
        else:
            msg = "Unknown validation type: {0}".format(self.validation_type)
            raise ValidationStepException(msg)
        """

    def get_split(self, k):
        return self.validator.get_split(k)

    def split(self):
        return self.validator.split()

    def get_n_splits(self):
        return self.validator.get_n_splits()
 def test_create_with_target_as_labels(self):
     data = {
         "train": {
             "X": pd.DataFrame(np.array([[0, 0], [0, 1], [1, 0], [1, 1]])),
             "y": pd.DataFrame(np.array(["a", "b", "a", "b"])),
         }
     }
     params = {"shuffle": True, "stratify": True, "k_folds": 2}
     vl = KFoldValidator(params, data)
     self.assertEqual(params["k_folds"], vl.get_n_splits())
     for train, validation in vl.split():
         X_train, y_train = train.get("X"), train.get("y")
         X_validation, y_validation = validation.get("X"), validation.get("y")
         self.assertEqual(X_train.shape[0], 2)
         self.assertEqual(y_train.shape[0], 2)
         self.assertEqual(X_validation.shape[0], 2)
         self.assertEqual(y_validation.shape[0], 2)
 def test_missing_target_values(self):
     # rows with missing target will be distributed equaly among folds
     data = {
         "train": {
             "X": pd.DataFrame(
                 np.array([[1, 0], [2, 1], [3, 0], [4, 1], [5, 1], [6, 1]])
             ),
             "y": pd.DataFrame(np.array(["a", "b", "a", "b", np.nan, np.nan])),
         }
     }
     params = {"shuffle": True, "stratify": True, "k_folds": 2}
     vl = KFoldValidator(params, data)
     self.assertEqual(params["k_folds"], vl.get_n_splits())
     for train, validation in vl.split():
         X_train, y_train = train.get("X"), train.get("y")
         X_validation, y_validation = validation.get("X"), validation.get("y")
         self.assertEqual(X_train.shape[0], 3)
         self.assertEqual(y_train.shape[0], 3)
         self.assertEqual(X_validation.shape[0], 3)
         self.assertEqual(y_validation.shape[0], 3)