Esempio n. 1
0
    def test__validate_samples(self):
        global dp
        expected = load_json(TEST_STR_FILE)

        dp.min_class_size = 1
        dp.valid_image_ids = [
            'helmet_1.jpg', 'helmet_2.jpg', 'helmet_3.jpg', 'image_png.png'
        ]
        dp._validate_samples()

        assert dp.samples == expected
        assert dp.invalid_samples == []
        assert dp.samples_count == {'left': 3, 'right': 1}

        # exclude first 3 samples as they are corrupted
        expected = [
            {
                'image_id': 'helmet_1.jpg',
                'label': 'left'
            },
            {
                'image_id': 'helmet_2.jpg',
                'label': 'left'
            },
            {
                'image_id': 'image_png.png',
                'label': 'right'
            },
        ]

        dp.valid_image_ids = ['helmet_1.jpg', 'helmet_2.jpg', 'image_png.png']
        dp.samples = load_json(TEST_STR_FILE_CORRUPTED)
        dp._validate_samples()

        assert dp.samples == expected
Esempio n. 2
0
    def __init__(self,
                 image_dir: str,
                 job_dir: str,
                 batch_size: int = BATCH_SIZE,
                 base_model_name: str = BASE_MODEL_NAME,
                 **kwargs) -> None:
        """Inits evaluation component.

        Loads the best model from job directory.
        Creates evaluation directory if app was started from commandline.
        """
        self.image_dir = Path(image_dir).resolve()
        self.job_dir = Path(job_dir).resolve()
        self.batch_size = batch_size
        self.base_model_name = base_model_name

        self.logger = get_logger(__name__, self.job_dir)
        self.samples_test: list = load_json(
            self.job_dir / 'test_samples.json')  # type: ignore
        self.class_mapping: dict = load_json(
            self.job_dir / 'class_mapping.json')  # type: ignore
        self.n_classes = len(self.class_mapping)
        self.classes = [
            str(self.class_mapping[str(i)]) for i in range(self.n_classes)
        ]
        self.y_true = np.array([i['label'] for i in self.samples_test])

        self._determine_plot_params()
        self._load_best_model()
        self._create_evaluation_dir()
Esempio n. 3
0
    def __init__(
        self,
        image_dir: str,
        job_dir: str,
        epochs_train_dense: typing.Union[int, str] = EPOCHS_TRAIN_DENSE,
        epochs_train_all: typing.Union[int, str] = EPOCHS_TRAIN_ALL,
        learning_rate_dense: typing.Union[float, str] = LEARNING_RATE_DENSE,
        learning_rate_all: typing.Union[float, str] = LEARNING_RATE_ALL,
        batch_size: typing.Union[int, str] = BATCH_SIZE,
        dropout_rate: typing.Union[float, str] = DROPOUT_RATE,
        base_model_name: str = BASE_MODEL_NAME,
        loss: str = LOSS,
        **kwargs,
    ) -> None:

        self.image_dir = Path(image_dir).resolve()
        self.job_dir = Path(job_dir).resolve()

        self.logger = get_logger(__name__, self.job_dir)
        self.samples_train = load_json(self.job_dir / 'train_samples.json')
        self.samples_val = load_json(self.job_dir / 'val_samples.json')
        self.class_mapping = load_json(self.job_dir / 'class_mapping.json')
        self.n_classes = len(self.class_mapping)

        self.epochs_train_dense = int(epochs_train_dense)
        self.epochs_train_all = int(epochs_train_all)
        self.learning_rate_dense = float(learning_rate_dense)
        self.learning_rate_all = float(learning_rate_all)
        self.batch_size = int(batch_size)
        self.dropout_rate = float(dropout_rate)
        self.base_model_name = base_model_name
        self.loss = loss
        self.use_multiprocessing, self.workers = use_multiprocessing()
Esempio n. 4
0
    def __init__(self,
                 job_dir: str,
                 image_dir: str,
                 samples_file: str,
                 min_class_size: int = MIN_CLASS_SIZE,
                 test_size: float = TEST_SIZE,
                 val_size: float = VALIDATION_SIZE,
                 part_size: float = PART_SIZE,
                 **kwargs) -> None:
        """Inits data preparation component.

        Loads samples file. Initializes variables for further operations:
        *valid_image_ids*, *class_mapping*, *train_samples*, *val_samples*, *test_samples*.
        """
        self.job_dir = Path(job_dir).resolve()
        if not self.job_dir.exists():
            os.makedirs(self.job_dir)

        self.image_dir = Path(image_dir)
        self.samples_file = Path(samples_file)
        self.samples_file = Path(samples_file)
        self.min_class_size = min_class_size
        self.test_size = test_size
        self.val_size = val_size
        self.part_size = part_size

        self.class_mapping: Optional[dict] = None
        self.valid_image_ids: Optional[List[str]] = None
        self.train_samples = None
        self.val_samples = None
        self.test_samples = None

        self.logger = get_logger(__name__, self.job_dir)
        self.samples = load_json(self.samples_file)
Esempio n. 5
0
    def test_split_samples_half_2(self):
        global dp
        dp.samples = load_json(TEST_SPLIT_FILE)
        dp.test_size = 0.2
        dp.val_size = 0.4
        dp.part_size = 2 / 3

        dp._split_samples()

        npt.assert_almost_equal(dp.train_size, 0.4)
        assert dp.test_size + dp.val_size + dp.train_size == 1.0

        assert len(dp.test_samples) == 27
        assert len(dp.val_samples) == 53
        assert len(dp.train_samples) == 53

        train_labels_count = Counter([i['label'] for i in dp.train_samples])
        val_labels_count = Counter([i['label'] for i in dp.val_samples])
        test_labels_count = Counter([i['label'] for i in dp.test_samples])

        assert test_labels_count[1] == 14
        assert test_labels_count[2] == 8
        assert test_labels_count[3] == 5

        assert val_labels_count[1] == 26
        assert val_labels_count[2] == 16
        assert val_labels_count[3] == 11

        assert train_labels_count[1] == 27
        assert train_labels_count[2] == 16
        assert train_labels_count[3] == 10
Esempio n. 6
0
    def test_split_samples_half(self):
        global dp
        dp.samples = load_json(TEST_SPLIT_FILE)
        dp.test_size = 0.2
        dp.val_size = 0.5
        dp.part_size = 1 / 2

        dp._split_samples()

        npt.assert_almost_equal(dp.train_size, 0.3)
        assert dp.test_size + dp.val_size + dp.train_size == 1.0

        assert len(dp.test_samples) == 20
        assert len(dp.val_samples) == 50
        assert len(dp.train_samples) == 30

        train_labels_count = Counter([i['label'] for i in dp.train_samples])
        val_labels_count = Counter([i['label'] for i in dp.val_samples])
        test_labels_count = Counter([i['label'] for i in dp.test_samples])

        assert test_labels_count[1] == 10
        assert test_labels_count[2] == 6
        assert test_labels_count[3] == 4

        assert val_labels_count[1] == 25
        assert val_labels_count[2] == 15
        assert val_labels_count[3] == 10

        assert train_labels_count[1] == 15
        assert train_labels_count[2] == 9
        assert train_labels_count[3] == 6
Esempio n. 7
0
    def test_split_samples_full_2(self):
        global dp
        dp.samples = load_json(TEST_SPLIT_FILE)
        dp.test_size = 0.2
        dp.val_size = 0.1
        dp.part_size = 1.0

        dp._split_samples()

        npt.assert_almost_equal(dp.train_size, 0.7)
        assert dp.test_size + dp.val_size + dp.train_size == 1.0

        assert len(dp.test_samples) == 40
        assert len(dp.val_samples) == 20
        assert len(dp.train_samples) == 140

        train_labels_count = Counter([i['label'] for i in dp.train_samples])
        val_labels_count = Counter([i['label'] for i in dp.val_samples])
        test_labels_count = Counter([i['label'] for i in dp.test_samples])

        assert test_labels_count[1] == 20
        assert test_labels_count[2] == 12
        assert test_labels_count[3] == 8

        assert val_labels_count[1] == 10
        assert val_labels_count[2] == 6
        assert val_labels_count[3] == 4

        assert train_labels_count[1] == 70
        assert train_labels_count[2] == 42
        assert train_labels_count[3] == 28
Esempio n. 8
0
    def test__validate_samples_2(self, mocker):
        mp_logger_info = mocker.patch('logging.Logger.info')

        global dp
        dp.valid_image_ids = ['1.jpg', '2.jpg', '3.jpg', '4.jpg']
        dp.samples = load_json(TEST_INT_FILE)
        dp._validate_samples()
        calls = [
            call('\n****** Running samples validation ******\n'),
            call('The following samples were dropped:'),
            call("- {'image_id': '5.jpg', 'label': 1}"),
            call("- {'image_id': '6.jpg', 'label': 1}"),
            call("- {'image_id': '7.jpg', 'label': 2}"),
            call("- {'image_id': '8.jpg', 'label': 1}"),
            call("- {'image_id': '9.jpg', 'label': 1}"),
            call("- {'image_id': '10.jpg', 'label': 2}"),
            call("- {'image_id': '11.jpg', 'label': 1}"),
            call("- {'image_id': '12.jpg', 'label': 1}"),
            call("- {'image_id': '13.jpg', 'label': 1}"),
            call("- {'image_id': '14.jpg', 'label': 2}"),
            call('NOTE: 26 samples were identified as invalid.\n'
                 'The full list of invalid samples will be saved in job dir.\n'
                 ),
            call('Class distribution after validation:'),
            call('1: 2 (50.0%)'),
            call('2: 2 (50.0%)'),
        ]

        mp_logger_info.assert_has_calls(calls)
Esempio n. 9
0
    def test_save_json(self):
        data = [
            {'image_id': 'helmet_1.jpg', 'label': 'left'},
            {'image_id': 'helmet_2.jpg', 'label': 'left'},
            {'image_id': 'image_png.png', 'label': 'right'},
        ]

        target_file = TEST_TARGET_FILE
        assert target_file.exists() is False
        save_json(data, target_file)
        assert target_file.exists()
        assert load_json(target_file) == data
        target_file.unlink()
Esempio n. 10
0
    def test__create_class_mapping(self):
        global dp
        dp.samples = load_json(TEST_STR_FILE)
        dp.samples_count = {'left': 3, 'right': 1}
        dp._create_class_mapping()
        expected = {0: 'left', 1: 'right'}
        assert dp.class_mapping == expected

        dp.samples = dp.samples[::-1]
        dp._create_class_mapping()
        expected = {0: 'left', 1: 'right'}
        assert dp.class_mapping == expected

        dp.samples = load_json(TEST_INT_FILE)
        dp.samples_count = {1: 10, 2: 20}
        dp._create_class_mapping()
        print(dp.class_mapping)
        expected = {0: 1, 1: 2}
        assert dp.class_mapping == expected

        dp.samples = dp.samples[::-1]
        dp._create_class_mapping()
        expected = {0: 1, 1: 2}
        assert dp.class_mapping == expected
Esempio n. 11
0
    def test__apply_class_mapping(self):
        global dp
        dp.samples = load_json(TEST_STR_FILE)
        dp.class_mapping = {0: 'left', 1: 'right'}
        dp._apply_class_mapping()
        expected = load_json(TEST_FILE_STR2INT)

        assert dp.samples == expected

        dp.samples = [
            {
                'image_id': 'helmet_2.jpg',
                'label': 'left'
            },
            {
                'image_id': 'image_png.png',
                'label': 'right',
                'test': 'abc'
            },
        ]
        expected = [
            {
                'image_id': 'helmet_2.jpg',
                'label': 0
            },
            {
                'image_id': 'image_png.png',
                'label': 1
            },
        ]

        dp._apply_class_mapping()

        assert dp.samples == expected

        dp.samples = [
            {
                'image_id': 'helmet_2.jpg',
                'label': 'left'
            },
            {
                'image_id': 'image_png.png',
                'label': 'right'
            },
        ]
        expected = [
            {
                'image_id': 'helmet_2.jpg',
                'label': 1
            },
            {
                'image_id': 'image_png.png',
                'label': 0
            },
        ]

        assert dp.samples != expected

        dp.samples = [
            {
                'image_id': 'helmet_2.jpg',
                'label': 1
            },
            {
                'image_id': 'image_png.png',
                'label': 0
            },
        ]
        expected = [
            {
                'image_id': 'helmet_2.jpg',
                'label': 1
            },
            {
                'image_id': 'image_png.png',
                'label': 0
            },
        ]

        assert dp.samples == expected