Ejemplo n.º 1
0
def test_train_test_split():
    num_samples = 40
    seed = random.randint(1, 100)
    test_size = 0.714
    num_train = int(num_samples * (1 - test_size))
    num_test = num_samples - num_train
    a_shape = (num_samples, 3, 5)
    a = np.random.random(a_shape)

    b_shape = (num_samples, 6, 9)
    b = np.random.random(b_shape)

    train_a, test_a, train_b, test_b = train_test_split(a, b, test_size=test_size, seed=seed)

    assert train_a.shape == (num_train, 3, 5)
    assert train_b.shape == (num_train, 6, 9)
    assert len(train_a) == len(train_b)

    assert test_a.shape == (num_test, 3, 5)
    assert test_b.shape == (num_test, 6, 9)
    assert len(test_a) == len(test_b)

    same_seed_train_a, same_seed_test_a, same_seed_train_b, same_seed_test_b = \
        train_test_split(a, b, test_size=test_size, seed=seed)

    assert np.all(train_a == same_seed_train_a)
    assert np.all(train_b == same_seed_train_b)
    assert np.all(test_a == same_seed_test_a)
    assert np.all(test_b == same_seed_test_b)

    diff_seed_train_a, diff_seed_test_a, diff_seed_train_b, diff_seed_test_b = \
        train_test_split(a, b, test_size=test_size, seed=0)

    assert not np.all(train_a == diff_seed_train_a)
    assert not np.all(train_b == diff_seed_train_b)
    assert not np.all(test_a == diff_seed_test_a)
    assert not np.all(test_b == diff_seed_test_b)
Ejemplo n.º 2
0
    def files(self):
        all_image_files = self._all_files()

        if self.validation_size > 0:
            train_image_files, test_image_files = train_test_split(
                all_image_files, test_size=self.validation_size, seed=1)

            if self.subset == "train":
                files = train_image_files
            else:
                files = test_image_files

            return files

        return all_image_files
Ejemplo n.º 3
0
    def files_and_annotations(self):
        files, annotations = self._files_and_annotations()
        if self.validation_size > 0:
            train_files, test_files, train_annotations, test_annotations = train_test_split(
                files, annotations, test_size=self.validation_size, seed=1)

            if self.subset == "train":
                files = train_files
                annotations = train_annotations
            if self.subset == "validation":
                files = test_files
                annotations = test_annotations

        files = files
        annotations = annotations

        return files, annotations
Ejemplo n.º 4
0
    def files_and_annotations(self):
        """Return image and annotation file list.
        If there is no test dataset, then split dataset to train and test lists with specific ratio.
        """
        if self.subset == "train" or self.validation_size > 0:
            text = "train.txt"
        else:
            text = "val.txt"

        filename = os.path.join(self.data_dir, text)
        df = pd.read_csv(
            filename,
            delim_whitespace=True,
            header=None,
            names=['image_files', 'label_files'],
        )

        image_files = df.image_files.tolist()
        label_files = df.label_files.tolist()

        image_files = [
            os.path.join(self.data_dir, filename) for filename in image_files
        ]
        label_files = [
            os.path.join(self.data_dir, filename) for filename in label_files
        ]

        if self.validation_size > 0:
            train_image_files, test_image_files, train_label_files, test_label_files = \
                train_test_split(image_files, label_files, test_size=self.validation_size, seed=1)
            if self.subset == "train":
                image_files = train_image_files
                label_files = train_label_files
            else:
                image_files = test_image_files
                label_files = test_label_files

        image_files, label_files = shuffle(image_files, label_files)
        print("files and annotations are ready")

        return image_files, label_files
Ejemplo n.º 5
0
    def files_and_annotations(self):
        """Return image and annotation file list.
        If there is no test dataset, then split dataset to train and test lists with specific ratio.
        """
        if self.subset == "train" or self.validation_size > 0:
            text = "train.txt"
        else:
            text = "val.txt"

        filename = os.path.join(self.data_dir, text)

        image_files, label_files = list(), list()
        with open(filename) as f:
            for line in f:
                items = line.split()
                image_files.append(items[0])
                label_files.append(items[1])

        image_files = [
            os.path.join(self.data_dir, filename) for filename in image_files
        ]
        label_files = [
            os.path.join(self.data_dir, filename) for filename in label_files
        ]

        if self.validation_size > 0:
            train_image_files, test_image_files, train_label_files, test_label_files = \
                train_test_split(image_files, label_files, test_size=self.validation_size, seed=1)
            if self.subset == "train":
                image_files = train_image_files
                label_files = train_label_files
            else:
                image_files = test_image_files
                label_files = test_label_files

        image_files, label_files = shuffle(image_files, label_files)
        print("files and annotations are ready")

        return image_files, label_files
Ejemplo n.º 6
0
    def files_and_annotations(self):
        """Return all files and labels list."""

        all_files, all_annotations = self._files_and_annotations(
            self.path['json'], self.path['dir'])

        if self.validation_size > 0:
            train_files, test_files, train_annotations, test_annotations =\
                train_test_split(all_files,
                                 all_annotations,
                                 test_size=self.validation_size,
                                 seed=1)

            if self.subset == "train":
                files = train_files
                annotations = train_annotations
            else:
                files = test_files
                annotations = test_annotations
        else:
            files, annotations = all_files, all_annotations

        return files, annotations