def files_and_annotations(self): """Return all files and labels list.""" single_split_rate = 0.1 multi_split_rate = 0.1 single_files, single_labels = self._single_files_and_annotations() multi_files, multi_labels = self._multi_files_and_annotations() train_single_files, test_single_files, train_single_labels, test_single_labels =\ train_test_split(single_files, single_labels, test_size=single_split_rate, seed=1) train_multi_files, test_multi_files, train_multi_labels, test_multi_labels =\ train_test_split(multi_files, multi_labels, test_size=multi_split_rate, seed=1) if self.subset == "train": files = train_multi_files + train_single_files labels = train_multi_labels + train_single_labels else: files = test_multi_files + test_single_files labels = test_multi_labels + test_single_labels files, labels = shuffle(files, labels, seed=1) print("files and annotations are ready") return files, labels
def test_train_test_split(): num_samples = 40 seed = random.randint(1, 100) test_size = 0.714 num_train = int(num_samples * (1 - test_size)) num_test = num_samples - num_train a_shape = (num_samples, 3, 5) a = np.random.random(a_shape) b_shape = (num_samples, 6, 9) b = np.random.random(b_shape) train_a, test_a, train_b, test_b = train_test_split(a, b, test_size=test_size, seed=seed) assert train_a.shape == (num_train, 3, 5) assert train_b.shape == (num_train, 6, 9) assert len(train_a) == len(train_b) assert test_a.shape == (num_test, 3, 5) assert test_b.shape == (num_test, 6, 9) assert len(test_a) == len(test_b) same_seed_train_a, same_seed_test_a, same_seed_train_b, same_seed_test_b = \ train_test_split(a, b, test_size=test_size, seed=seed) assert np.all(train_a == same_seed_train_a) assert np.all(train_b == same_seed_train_b) assert np.all(test_a == same_seed_test_a) assert np.all(test_b == same_seed_test_b) diff_seed_train_a, diff_seed_test_a, diff_seed_train_b, diff_seed_test_b = \ train_test_split(a, b, test_size=test_size, seed=0) assert not np.all(train_a == diff_seed_train_a) assert not np.all(train_b == diff_seed_train_b) assert not np.all(test_a == diff_seed_test_a) assert not np.all(test_b == diff_seed_test_b)
def files(self): all_image_files = self._all_files() if self.validation_size > 0: train_image_files, test_image_files = train_test_split( all_image_files, test_size=self.validation_size, seed=1) if self.subset == "train": files = train_image_files else: files = test_image_files return files return all_image_files
def files_and_annotations(self): """Return all files and gt_boxes list.""" split_rate = 0.1 all_image_files = [image_path for image_path in glob.glob(self.image_dir + "/*.jpg")] train_image_files, test_image_files = train_test_split(all_image_files, test_size=split_rate, seed=1) if self.subset == "train": image_files = train_image_files else: image_files = test_image_files label_files = [image_path.replace(self.image_dir, self.annotation_dir) for image_path in image_files] label_files = [image_path.replace("jpg", "png") for image_path in label_files] print("files and annotations are ready") return image_files, label_files
def files_and_annotations(self): files, annotations = self._files_and_annotations() if self.validation_size > 0: train_files, test_files, train_annotations, test_annotations = train_test_split( files, annotations, test_size=self.validation_size, seed=1) if self.subset == "train": files = train_files annotations = train_annotations if self.subset == "validation": files = test_files annotations = test_annotations files = files annotations = annotations return files, annotations
def _files_and_annotations(self): """Return all files and labels list.""" single_split_rate = 0.1 files, labels = self._files_and_annotations_from_json(self.json) train_files, test_files, train_labels, test_labels =\ train_test_split(files, labels, test_size=single_split_rate, seed=1) if self.subset == "train": files = train_files labels = train_labels else: files = test_files labels = test_labels print("files and annotations are ready") return files, labels
def files_and_annotations(self): """Return image and annotation file list. If there is no test dataset, then split dataset to train and test lists with specific ratio. """ if self.subset == "train" or self.validation_size > 0: text = "train.txt" else: text = "val.txt" filename = os.path.join(self.data_dir, text) df = pd.read_csv( filename, delim_whitespace=True, header=None, names=['image_files', 'label_files'], ) image_files = df.image_files.tolist() label_files = df.label_files.tolist() image_files = [ os.path.join(self.data_dir, filename) for filename in image_files ] label_files = [ os.path.join(self.data_dir, filename) for filename in label_files ] if self.validation_size > 0: train_image_files, test_image_files, train_label_files, test_label_files = \ train_test_split(image_files, label_files, test_size=self.validation_size, seed=1) if self.subset == "train": image_files = train_image_files label_files = train_label_files else: image_files = test_image_files label_files = test_label_files image_files, label_files = shuffle(image_files, label_files) print("files and annotations are ready") return image_files, label_files
def files_and_annotations(self): """Return all files and labels list.""" all_files, all_annotations = self._files_and_annotations( self.path['json'], self.path['dir']) if self.validation_size > 0: train_files, test_files, train_annotations, test_annotations =\ train_test_split(all_files, all_annotations, test_size=self.validation_size, seed=1) if self.subset == "train": files = train_files annotations = train_annotations else: files = test_files annotations = test_annotations else: files, annotations = all_files, all_annotations return files, annotations
def files_and_annotations(self): """Return all files and gt_boxes list.""" split_rate = 0.2 jpg_image_files = [image_path for image_path in glob.glob(self.image_dir + "/*.jpg")] png_image_files = [image_path for image_path in glob.glob(self.image_dir + "/*.png")] JPG_image_files = [image_path for image_path in glob.glob(self.image_dir + "/*.JPG")] jpeg_image_files = [image_path for image_path in glob.glob(self.image_dir + "/*.jpeg")] all_image_files = jpg_image_files + png_image_files + JPG_image_files + jpeg_image_files train_image_files, test_image_files = train_test_split(all_image_files, test_size=split_rate, seed=1) if self.subset == "train": image_files = train_image_files else: image_files = test_image_files label_files = [image_path.replace(self.image_dir, self.annotation_dir) for image_path in image_files] label_files = [image_path.replace("jpg", "png") for image_path in label_files] label_files = [image_path.replace("JPG", "png") for image_path in label_files] label_files = [image_path.replace("jpeg", "png") for image_path in label_files] print("files and annotations are ready", self.subset, len(image_files), len(label_files)) return image_files, label_files