예제 #1
0
def test_shuffle_diff_length():

    a_shape = (40, 3, 5)
    a = np.random.random(a_shape)

    b_shape = (20, 6, 9)
    b = np.random.random(b_shape)

    with pytest.raises(Exception):
        result_a, result_b = shuffle(a, b)

    c = list(range(15))

    with pytest.raises(Exception):
        result_a, result_c = shuffle(a, c)
예제 #2
0
    def _images_and_labels(self):
        if self.subset == "train" or self.subset == "train_validation_saving":
            files = ["data_batch_1", "data_batch_2", "data_batch_3", "data_batch_4", "data_batch_5"]

        else:
            files = ["test_batch"]

        data = [self._load_data(filename) for filename in files]

        images = [images for images, labels in data]
        images = np.concatenate(images, axis=0)

        labels = [labels for images, labels in data]
        labels = np.concatenate(labels, axis=0)

        if self.train_validation_saving_size > 0:
            # split the train set into train and train_validation_saving
            if self.subset == "train":
                images, _ = np.split(images, [-self.train_validation_saving_size], axis=0)
                labels, _ = np.split(labels, [-self.train_validation_saving_size], axis=0)
            elif self.subset == "train_validation_saving":
                _, images = np.split(images, [-self.train_validation_saving_size], axis=0)
                _, labels = np.split(labels, [-self.train_validation_saving_size], axis=0)

        # randomaize
        if self.subset == "train":
            images, labels = shuffle(images, labels, seed=0)
        return images, labels
예제 #3
0
    def files_and_annotations(self):
        """Return all files and labels list."""
        single_split_rate = 0.1
        multi_split_rate = 0.1
        single_files, single_labels = self._single_files_and_annotations()
        multi_files, multi_labels = self._multi_files_and_annotations()

        train_single_files, test_single_files, train_single_labels, test_single_labels =\
            train_test_split(single_files,
                             single_labels,
                             test_size=single_split_rate,
                             seed=1)

        train_multi_files, test_multi_files, train_multi_labels, test_multi_labels =\
            train_test_split(multi_files,
                             multi_labels,
                             test_size=multi_split_rate,
                             seed=1)

        if self.subset == "train":
            files = train_multi_files + train_single_files
            labels = train_multi_labels + train_single_labels
        else:
            files = test_multi_files + test_single_files
            labels = test_multi_labels + test_single_labels

        files, labels = shuffle(files, labels, seed=1)

        print("files and annotations are ready")
        return files, labels
예제 #4
0
    def files_and_annotations(self):
        """Return all files and gt_boxes list."""

        if self.subset == "train":
            text = "train.txt"

        if self.subset == "validation":
            text = "val.txt"

        filename = os.path.join(self.data_dir, text)
        df = pd.read_csv(
            filename,
            delim_whitespace=True,
            header=None,
            names=['image_files', 'label_files'],
        )

        image_files = df.image_files.tolist()
        label_files = df.label_files.tolist()

        image_files = [
            filename.replace("/SegNet/CamVid", self.data_dir)
            for filename in image_files
        ]
        label_files = [
            filename.replace("/SegNet/CamVid", self.data_dir)
            for filename in label_files
        ]

        image_files, label_files = shuffle(image_files, label_files)
        print("files and annotations are ready")

        return image_files, label_files
예제 #5
0
 def _shuffle(self):
     if self.subset == "train" and self.is_shuffle:
         self._feed_indices = shuffle(range(self.num_per_epoch),
                                      seed=self.seed)
         print("Shuffle {} train dataset with random state {}.".format(
             self.__class__.__name__, self.seed))
         self.seed = self.seed + 1
예제 #6
0
    def _shuffle(self):
        """Shuffle data if train."""

        if self.subset == "train" or self.subset == "train_validation":
            self.files, self.annotations = shuffle(
                self.files, self.annotations, seed=self.seed)
            print("Shuffle {} train dataset with random state {}.".format(self.__class__.__name__, self.seed))
            self.seed += 1
예제 #7
0
    def get_shuffle_index(self):
        """Return list of shuffled index."""
        images, _ = self._images_and_labels()
        random_indices = shuffle(range(len(images)), seed=self.seed)
        print("Shuffle {} train dataset with random state {}.".format(self.__class__.__name__, self.seed))
        self.seed += 1

        return random_indices
예제 #8
0
    def indices(self):
        if not hasattr(self, "_indices"):
            if self.subset == "train" and self.is_shuffle:
                self._indices = shuffle(range(self.num_per_epoch),
                                        seed=self.seed)
            else:
                self._indices = list(range(self.num_per_epoch))

        return self._indices
예제 #9
0
def test_shuffle_range():
    range_list = range(40)

    seed = random.randint(1, 100)

    result = shuffle(range_list, seed=seed)

    assert len(result) == len(range_list)
    assert type(result) == list
    assert result != range_list
예제 #10
0
파일: bdd100k.py 프로젝트: tkng/blueoil
    def _shuffle(self):

        if not self.is_shuffle:
            return

        if self.subset == "train":
            self.paths, self.bboxs = shuffle(
                self.paths, self.bboxs, seed=self.seed)
            print(
                "Shuffle {} train dataset with seed {}.".format(
                    self.__class__.__name__,
                    self.seed))
            self.seed = self.seed + 1
예제 #11
0
def test_shuffle():
    num_samples = 40

    a_shape = (num_samples, 3, 5)
    a = np.random.random(a_shape)

    b_shape = (num_samples, 6, 9)
    b = np.random.random(b_shape)

    c = list(range(40))

    seed = random.randint(1, 100)

    result_a, result_b, result_c = shuffle(a, b, c, seed=seed)

    assert result_a.shape == a_shape
    assert result_b.shape == b_shape
    assert len(c) == len(result_c)
    assert not np.all(a == result_a)
    assert not np.all(b == result_b)
    assert not c == result_c

    same_seed_a, same_seed_b, same_seed_c = shuffle(a, b, c, seed=seed)
    assert same_seed_a.shape == a_shape
    assert same_seed_b.shape == b_shape
    assert len(same_seed_c) == len(c)
    assert np.all(same_seed_a == result_a)
    assert np.all(same_seed_b == result_b)
    assert same_seed_c == result_c

    diff_seed_a, diff_seed_b, diff_seed_c = shuffle(a, b, c, seed=None)
    assert diff_seed_a.shape == a_shape
    assert diff_seed_b.shape == b_shape
    assert len(diff_seed_c) == len(c)
    assert not np.all(diff_seed_a == result_a)
    assert not np.all(diff_seed_b == result_b)
    assert not same_seed_c == c
예제 #12
0
파일: camvid.py 프로젝트: ki-lm/blueoil
    def files_and_annotations(self):
        """Return image and annotation file list.
        If there is no test dataset, then split dataset to train and test lists with specific ratio.
        """
        if self.subset == "train" or self.validation_size > 0:
            text = "train.txt"
        else:
            text = "val.txt"

        filename = os.path.join(self.data_dir, text)
        df = pd.read_csv(
            filename,
            delim_whitespace=True,
            header=None,
            names=['image_files', 'label_files'],
        )

        image_files = df.image_files.tolist()
        label_files = df.label_files.tolist()

        image_files = [
            os.path.join(self.data_dir, filename) for filename in image_files
        ]
        label_files = [
            os.path.join(self.data_dir, filename) for filename in label_files
        ]

        if self.validation_size > 0:
            train_image_files, test_image_files, train_label_files, test_label_files = \
                train_test_split(image_files, label_files, test_size=self.validation_size, seed=1)
            if self.subset == "train":
                image_files = train_image_files
                label_files = train_label_files
            else:
                image_files = test_image_files
                label_files = test_label_files

        image_files, label_files = shuffle(image_files, label_files)
        print("files and annotations are ready")

        return image_files, label_files