def test_shuffle_diff_length(): a_shape = (40, 3, 5) a = np.random.random(a_shape) b_shape = (20, 6, 9) b = np.random.random(b_shape) with pytest.raises(Exception): result_a, result_b = shuffle(a, b) c = list(range(15)) with pytest.raises(Exception): result_a, result_c = shuffle(a, c)
def _images_and_labels(self): if self.subset == "train" or self.subset == "train_validation_saving": files = ["train"] else: files = ["test"] data = [self._load_data(filename) for filename in files] images = [images for images, labels in data] images = np.concatenate(images, axis=0) labels = [labels for images, labels in data] labels = np.concatenate(labels, axis=0) if self.train_validation_saving_size > 0: # split the train set into train and train_validation_saving if self.subset == "train": images, _ = np.split(images, [-self.train_validation_saving_size], axis=0) labels, _ = np.split(labels, [-self.train_validation_saving_size], axis=0) elif self.subset == "train_validation_saving": _, images = np.split(images, [-self.train_validation_saving_size], axis=0) _, labels = np.split(labels, [-self.train_validation_saving_size], axis=0) # randomaize if self.subset == "train": images, labels = shuffle(images, labels, seed=0) return images, labels
def files_and_annotations(self): """Return all files and gt_boxes list.""" if self.subset == "train": text = "train.txt" if self.subset == "validation": text = "val.txt" filename = os.path.join(self.data_dir, text) df = pd.read_csv( filename, delim_whitespace=True, header=None, names=['image_files', 'label_files'], ) image_files = df.image_files.tolist() label_files = df.label_files.tolist() image_files = [filename.replace("/SegNet/CamVid", self.data_dir) for filename in image_files] label_files = [filename.replace("/SegNet/CamVid", self.data_dir) for filename in label_files] image_files, label_files = shuffle(image_files, label_files) print("files and annotations are ready") return image_files, label_files
def files_and_annotations(self): """Return all files and gt_boxes list.""" if self.subset == "train": text = "train.txt" if self.subset == "validation": text = "val.txt" filename = os.path.join(self.data_dir, text) image_files, label_files = list(), list() with open(filename) as f: for line in f: items = line.split() image_files.append(items[0]) label_files.append(items[1]) image_files = [ filename.replace("/SegNet/CamVid", self.data_dir) for filename in image_files ] label_files = [ filename.replace("/SegNet/CamVid", self.data_dir) for filename in label_files ] image_files, label_files = shuffle(image_files, label_files) print("files and annotations are ready") return image_files, label_files
def test_shuffle_range(): range_list = range(40) seed = random.randint(1, 100) result = shuffle(range_list, seed=seed) assert len(result) == len(range_list) assert type(result) == list assert result != range_list
def _images_and_labels(self): images = np.empty([0, self.image_size, self.image_size, 3]) labels = np.empty([0, self.num_classes]) for path in self._all_files(): (tmp_images, tmp_labels) = self._load_data(path) images = np.concatenate([images, tmp_images]) labels = np.concatenate([labels, tmp_labels]) if self.subset == "train": images, labels = shuffle(images, labels, seed=0) return images, labels
def test_shuffle(): num_samples = 40 a_shape = (num_samples, 3, 5) a = np.random.random(a_shape) b_shape = (num_samples, 6, 9) b = np.random.random(b_shape) c = list(range(40)) seed = random.randint(1, 100) result_a, result_b, result_c = shuffle(a, b, c, seed=seed) assert result_a.shape == a_shape assert result_b.shape == b_shape assert len(c) == len(result_c) assert not np.all(a == result_a) assert not np.all(b == result_b) assert not c == result_c same_seed_a, same_seed_b, same_seed_c = shuffle(a, b, c, seed=seed) assert same_seed_a.shape == a_shape assert same_seed_b.shape == b_shape assert len(same_seed_c) == len(c) assert np.all(same_seed_a == result_a) assert np.all(same_seed_b == result_b) assert same_seed_c == result_c diff_seed_a, diff_seed_b, diff_seed_c = shuffle(a, b, c, seed=None) assert diff_seed_a.shape == a_shape assert diff_seed_b.shape == b_shape assert len(diff_seed_c) == len(c) assert not np.all(diff_seed_a == result_a) assert not np.all(diff_seed_b == result_b) assert not same_seed_c == c
def _images_and_labels(self): images = np.empty([0, self.image_size, self.image_size]) labels = np.empty([0]) for path in self._all_files(): (train_imgs, train_lbls), (test_imgs, test_lbls) = self._load_data(path) if self.subset == "train": images = np.concatenate([images, train_imgs]) labels = np.concatenate([labels, train_lbls]) else: images = np.concatenate([images, test_imgs]) labels = np.concatenate([labels, test_lbls]) if self.subset == "train": images, labels = shuffle(images, labels, seed=0) return images, labels
def _images_and_labels(self): if self.subset == "train": files = ["data_batch_1", "data_batch_2", "data_batch_3", "data_batch_4", "data_batch_5"] else: files = ["test_batch"] data = [self._load_data(filename) for filename in files] images = [images for images, labels in data] images = np.concatenate(images, axis=0) labels = [labels for images, labels in data] labels = np.concatenate(labels, axis=0) # randomaize if self.subset == "train": images, labels = shuffle(images, labels, seed=0) return images, labels
def files_and_annotations(self): """Return image and annotation file list. If there is no test dataset, then split dataset to train and test lists with specific ratio. """ if self.subset == "train" or self.validation_size > 0: text = "train.txt" else: text = "val.txt" filename = os.path.join(self.data_dir, text) df = pd.read_csv( filename, delim_whitespace=True, header=None, names=['image_files', 'label_files'], ) image_files = df.image_files.tolist() label_files = df.label_files.tolist() image_files = [ os.path.join(self.data_dir, filename) for filename in image_files ] label_files = [ os.path.join(self.data_dir, filename) for filename in label_files ] if self.validation_size > 0: train_image_files, test_image_files, train_label_files, test_label_files = \ train_test_split(image_files, label_files, test_size=self.validation_size, seed=1) if self.subset == "train": image_files = train_image_files label_files = train_label_files else: image_files = test_image_files label_files = test_label_files image_files, label_files = shuffle(image_files, label_files) print("files and annotations are ready") return image_files, label_files
def files_and_annotations(self): """Return image and annotation file list. If there is no test dataset, then split dataset to train and test lists with specific ratio. """ if self.subset == "train" or self.validation_size > 0: text = "train.txt" else: text = "val.txt" filename = os.path.join(self.data_dir, text) image_files, label_files = list(), list() with open(filename) as f: for line in f: items = line.split() image_files.append(items[0]) label_files.append(items[1]) image_files = [ os.path.join(self.data_dir, filename) for filename in image_files ] label_files = [ os.path.join(self.data_dir, filename) for filename in label_files ] if self.validation_size > 0: train_image_files, test_image_files, train_label_files, test_label_files = \ train_test_split(image_files, label_files, test_size=self.validation_size, seed=1) if self.subset == "train": image_files = train_image_files label_files = train_label_files else: image_files = test_image_files label_files = test_label_files image_files, label_files = shuffle(image_files, label_files) print("files and annotations are ready") return image_files, label_files