def read_dataset(): dataset = sets.Ocr() dataset = sets.OneHot(dataset.target, depth=2)(dataset, columns=['target']) dataset['data'] = dataset.data.reshape(dataset.data.shape[:-2] + (-1, )).astype(float) train, test = sets.Split(0.66)(dataset) return train, test
def test_split(dataset): one, two = sets.Split(0.5)(dataset) assert len(one) + len(two) == len(dataset) data = np.concatenate((one.data, two.data)) target = np.concatenate((one.target, two.target)) assert (data == dataset.data).all() assert (target == dataset.target).all()
def get_dataset(): """Read dataset and flatten images.""" dataset = sets.Ocr() dataset = sets.OneHot(dataset.target, depth=2)(dataset, columns=['target']) dataset['data'] = dataset.data.reshape(dataset.data.shape[:-2] + (-1, )).astype(float) train, test = sets.Split(0.66)(dataset) return train, test
def split(full, num): """returns the num-th partition of 10-fold cross validation (0 to 9) full: sets.core.dataset.Dataset object num: int rtype: (Dataset, Dataset)""" validation_size = len(full) // 10 data = full.data.tolist() target = full.target.tolist() length = full.length.tolist() for _ in range(num * validation_size): data.insert(0, data.pop()) target.insert(0, target.pop()) length.insert(0, length.pop()) data = np.array(data) target = np.array(target) length = np.array(length) full = sets.core.dataset.Dataset(data=data, target=target) full.__setitem__('length', length) train, validation = sets.Split(0.9)(full) return train, validation