def files_and_annotations(self):
        """Return all files and labels list."""
        single_split_rate = 0.1
        multi_split_rate = 0.1
        single_files, single_labels = self._single_files_and_annotations()
        multi_files, multi_labels = self._multi_files_and_annotations()

        train_single_files, test_single_files, train_single_labels, test_single_labels =\
            train_test_split(single_files,
                             single_labels,
                             test_size=single_split_rate,
                             seed=1)

        train_multi_files, test_multi_files, train_multi_labels, test_multi_labels =\
            train_test_split(multi_files,
                             multi_labels,
                             test_size=multi_split_rate,
                             seed=1)

        if self.subset == "train":
            files = train_multi_files + train_single_files
            labels = train_multi_labels + train_single_labels
        else:
            files = test_multi_files + test_single_files
            labels = test_multi_labels + test_single_labels

        files, labels = shuffle(files, labels, seed=1)

        print("files and annotations are ready")
        return files, labels
Exemple #2
0
def test_train_test_split():
    num_samples = 40
    seed = random.randint(1, 100)
    test_size = 0.714
    num_train = int(num_samples * (1 - test_size))
    num_test = num_samples - num_train
    a_shape = (num_samples, 3, 5)
    a = np.random.random(a_shape)

    b_shape = (num_samples, 6, 9)
    b = np.random.random(b_shape)

    train_a, test_a, train_b, test_b = train_test_split(a,
                                                        b,
                                                        test_size=test_size,
                                                        seed=seed)

    assert train_a.shape == (num_train, 3, 5)
    assert train_b.shape == (num_train, 6, 9)
    assert len(train_a) == len(train_b)

    assert test_a.shape == (num_test, 3, 5)
    assert test_b.shape == (num_test, 6, 9)
    assert len(test_a) == len(test_b)

    same_seed_train_a, same_seed_test_a, same_seed_train_b, same_seed_test_b = \
        train_test_split(a, b, test_size=test_size, seed=seed)

    assert np.all(train_a == same_seed_train_a)
    assert np.all(train_b == same_seed_train_b)
    assert np.all(test_a == same_seed_test_a)
    assert np.all(test_b == same_seed_test_b)

    diff_seed_train_a, diff_seed_test_a, diff_seed_train_b, diff_seed_test_b = \
        train_test_split(a, b, test_size=test_size, seed=0)

    assert not np.all(train_a == diff_seed_train_a)
    assert not np.all(train_b == diff_seed_train_b)
    assert not np.all(test_a == diff_seed_test_a)
    assert not np.all(test_b == diff_seed_test_b)
Exemple #3
0
    def files(self):
        all_image_files = self._all_files()

        if self.validation_size > 0:
            train_image_files, test_image_files = train_test_split(
                all_image_files, test_size=self.validation_size, seed=1)

            if self.subset == "train":
                files = train_image_files
            else:
                files = test_image_files

            return files

        return all_image_files
Exemple #4
0
    def files_and_annotations(self):
        """Return all files and gt_boxes list."""
        split_rate = 0.1
        all_image_files = [image_path for image_path in glob.glob(self.image_dir + "/*.jpg")]
        train_image_files, test_image_files = train_test_split(all_image_files, test_size=split_rate, seed=1)

        if self.subset == "train":
            image_files = train_image_files
        else:
            image_files = test_image_files

        label_files = [image_path.replace(self.image_dir, self.annotation_dir) for image_path in image_files]
        label_files = [image_path.replace("jpg", "png") for image_path in label_files]

        print("files and annotations are ready")
        return image_files, label_files
Exemple #5
0
    def files_and_annotations(self):
        files, annotations = self._files_and_annotations()
        if self.validation_size > 0:
            train_files, test_files, train_annotations, test_annotations = train_test_split(
                files, annotations, test_size=self.validation_size, seed=1)

            if self.subset == "train":
                files = train_files
                annotations = train_annotations
            if self.subset == "validation":
                files = test_files
                annotations = test_annotations

        files = files
        annotations = annotations

        return files, annotations
Exemple #6
0
    def _files_and_annotations(self):
        """Return all files and labels list."""
        single_split_rate = 0.1
        files, labels = self._files_and_annotations_from_json(self.json)

        train_files, test_files, train_labels, test_labels =\
            train_test_split(files,
                             labels,
                             test_size=single_split_rate,
                             seed=1)

        if self.subset == "train":
            files = train_files
            labels = train_labels
        else:
            files = test_files
            labels = test_labels

        print("files and annotations are ready")
        return files, labels
Exemple #7
0
    def files_and_annotations(self):
        """Return image and annotation file list.
        If there is no test dataset, then split dataset to train and test lists with specific ratio.
        """
        if self.subset == "train" or self.validation_size > 0:
            text = "train.txt"
        else:
            text = "val.txt"

        filename = os.path.join(self.data_dir, text)
        df = pd.read_csv(
            filename,
            delim_whitespace=True,
            header=None,
            names=['image_files', 'label_files'],
        )

        image_files = df.image_files.tolist()
        label_files = df.label_files.tolist()

        image_files = [
            os.path.join(self.data_dir, filename) for filename in image_files
        ]
        label_files = [
            os.path.join(self.data_dir, filename) for filename in label_files
        ]

        if self.validation_size > 0:
            train_image_files, test_image_files, train_label_files, test_label_files = \
                train_test_split(image_files, label_files, test_size=self.validation_size, seed=1)
            if self.subset == "train":
                image_files = train_image_files
                label_files = train_label_files
            else:
                image_files = test_image_files
                label_files = test_label_files

        image_files, label_files = shuffle(image_files, label_files)
        print("files and annotations are ready")

        return image_files, label_files
Exemple #8
0
    def files_and_annotations(self):
        """Return all files and labels list."""

        all_files, all_annotations = self._files_and_annotations(
            self.path['json'], self.path['dir'])

        if self.validation_size > 0:
            train_files, test_files, train_annotations, test_annotations =\
                train_test_split(all_files,
                                 all_annotations,
                                 test_size=self.validation_size,
                                 seed=1)

            if self.subset == "train":
                files = train_files
                annotations = train_annotations
            else:
                files = test_files
                annotations = test_annotations
        else:
            files, annotations = all_files, all_annotations

        return files, annotations
Exemple #9
0
    def files_and_annotations(self):
        """Return all files and gt_boxes list."""
        split_rate = 0.2
        jpg_image_files = [image_path for image_path in glob.glob(self.image_dir + "/*.jpg")]
        png_image_files = [image_path for image_path in glob.glob(self.image_dir + "/*.png")]
        JPG_image_files = [image_path for image_path in glob.glob(self.image_dir + "/*.JPG")]
        jpeg_image_files = [image_path for image_path in glob.glob(self.image_dir + "/*.jpeg")]
        all_image_files = jpg_image_files + png_image_files + JPG_image_files + jpeg_image_files

        train_image_files, test_image_files = train_test_split(all_image_files, test_size=split_rate, seed=1)

        if self.subset == "train":
            image_files = train_image_files
        else:
            image_files = test_image_files

        label_files = [image_path.replace(self.image_dir, self.annotation_dir) for image_path in image_files]
        label_files = [image_path.replace("jpg", "png") for image_path in label_files]
        label_files = [image_path.replace("JPG", "png") for image_path in label_files]
        label_files = [image_path.replace("jpeg", "png") for image_path in label_files]

        print("files and annotations are ready", self.subset, len(image_files), len(label_files))
        return image_files, label_files