예제 #1
0
    def test__set_dataframe(self):
        empty_frame = pd.DataFrame()

        kf = KFoldPartitioning(n_splits=2)

        with self.assertRaises(PartitionError):
            kf.set_dataframe(empty_frame)
예제 #2
0
    def test_iter(self):

        kf = KFoldPartitioning()

        kf.set_dataframe(original_frame)

        for train, test in kf:

            original_list = [
                list(row) for row in original_frame.itertuples(index=False)
            ]
            train_list = [list(row) for row in train.itertuples(index=False)]
            test_list = [list(row) for row in test.itertuples(index=False)]

            # Check that train and test are a partition
            train_not_in_test = [
                row for row in train_list if row not in test_list
            ]
            self.assertCountEqual(
                train_list, train_not_in_test)  # Count so regardless of order
            test_not_in_train = [
                row for row in test_list if row not in train_list
            ]
            self.assertCountEqual(
                test_list, test_not_in_train)  # Count so regardless of order

            # Check that the union of the two give the original data
            union_list = train_list + test_list
            self.assertCountEqual(original_list,
                                  union_list)  # Count so regardless of order