def test__set_dataframe(self): empty_frame = pd.DataFrame() kf = KFoldPartitioning(n_splits=2) with self.assertRaises(PartitionError): kf.set_dataframe(empty_frame)
def test_iter(self): kf = KFoldPartitioning() kf.set_dataframe(original_frame) for train, test in kf: original_list = [ list(row) for row in original_frame.itertuples(index=False) ] train_list = [list(row) for row in train.itertuples(index=False)] test_list = [list(row) for row in test.itertuples(index=False)] # Check that train and test are a partition train_not_in_test = [ row for row in train_list if row not in test_list ] self.assertCountEqual( train_list, train_not_in_test) # Count so regardless of order test_not_in_train = [ row for row in test_list if row not in train_list ] self.assertCountEqual( test_list, test_not_in_train) # Count so regardless of order # Check that the union of the two give the original data union_list = train_list + test_list self.assertCountEqual(original_list, union_list) # Count so regardless of order