def test__validate_samples(self): global dp expected = load_json(TEST_STR_FILE) dp.min_class_size = 1 dp.valid_image_ids = [ 'helmet_1.jpg', 'helmet_2.jpg', 'helmet_3.jpg', 'image_png.png' ] dp._validate_samples() assert dp.samples == expected assert dp.invalid_samples == [] assert dp.samples_count == {'left': 3, 'right': 1} # exclude first 3 samples as they are corrupted expected = [ { 'image_id': 'helmet_1.jpg', 'label': 'left' }, { 'image_id': 'helmet_2.jpg', 'label': 'left' }, { 'image_id': 'image_png.png', 'label': 'right' }, ] dp.valid_image_ids = ['helmet_1.jpg', 'helmet_2.jpg', 'image_png.png'] dp.samples = load_json(TEST_STR_FILE_CORRUPTED) dp._validate_samples() assert dp.samples == expected
def __init__(self, image_dir: str, job_dir: str, batch_size: int = BATCH_SIZE, base_model_name: str = BASE_MODEL_NAME, **kwargs) -> None: """Inits evaluation component. Loads the best model from job directory. Creates evaluation directory if app was started from commandline. """ self.image_dir = Path(image_dir).resolve() self.job_dir = Path(job_dir).resolve() self.batch_size = batch_size self.base_model_name = base_model_name self.logger = get_logger(__name__, self.job_dir) self.samples_test: list = load_json( self.job_dir / 'test_samples.json') # type: ignore self.class_mapping: dict = load_json( self.job_dir / 'class_mapping.json') # type: ignore self.n_classes = len(self.class_mapping) self.classes = [ str(self.class_mapping[str(i)]) for i in range(self.n_classes) ] self.y_true = np.array([i['label'] for i in self.samples_test]) self._determine_plot_params() self._load_best_model() self._create_evaluation_dir()
def __init__( self, image_dir: str, job_dir: str, epochs_train_dense: typing.Union[int, str] = EPOCHS_TRAIN_DENSE, epochs_train_all: typing.Union[int, str] = EPOCHS_TRAIN_ALL, learning_rate_dense: typing.Union[float, str] = LEARNING_RATE_DENSE, learning_rate_all: typing.Union[float, str] = LEARNING_RATE_ALL, batch_size: typing.Union[int, str] = BATCH_SIZE, dropout_rate: typing.Union[float, str] = DROPOUT_RATE, base_model_name: str = BASE_MODEL_NAME, loss: str = LOSS, **kwargs, ) -> None: self.image_dir = Path(image_dir).resolve() self.job_dir = Path(job_dir).resolve() self.logger = get_logger(__name__, self.job_dir) self.samples_train = load_json(self.job_dir / 'train_samples.json') self.samples_val = load_json(self.job_dir / 'val_samples.json') self.class_mapping = load_json(self.job_dir / 'class_mapping.json') self.n_classes = len(self.class_mapping) self.epochs_train_dense = int(epochs_train_dense) self.epochs_train_all = int(epochs_train_all) self.learning_rate_dense = float(learning_rate_dense) self.learning_rate_all = float(learning_rate_all) self.batch_size = int(batch_size) self.dropout_rate = float(dropout_rate) self.base_model_name = base_model_name self.loss = loss self.use_multiprocessing, self.workers = use_multiprocessing()
def __init__(self, job_dir: str, image_dir: str, samples_file: str, min_class_size: int = MIN_CLASS_SIZE, test_size: float = TEST_SIZE, val_size: float = VALIDATION_SIZE, part_size: float = PART_SIZE, **kwargs) -> None: """Inits data preparation component. Loads samples file. Initializes variables for further operations: *valid_image_ids*, *class_mapping*, *train_samples*, *val_samples*, *test_samples*. """ self.job_dir = Path(job_dir).resolve() if not self.job_dir.exists(): os.makedirs(self.job_dir) self.image_dir = Path(image_dir) self.samples_file = Path(samples_file) self.samples_file = Path(samples_file) self.min_class_size = min_class_size self.test_size = test_size self.val_size = val_size self.part_size = part_size self.class_mapping: Optional[dict] = None self.valid_image_ids: Optional[List[str]] = None self.train_samples = None self.val_samples = None self.test_samples = None self.logger = get_logger(__name__, self.job_dir) self.samples = load_json(self.samples_file)
def test_split_samples_half_2(self): global dp dp.samples = load_json(TEST_SPLIT_FILE) dp.test_size = 0.2 dp.val_size = 0.4 dp.part_size = 2 / 3 dp._split_samples() npt.assert_almost_equal(dp.train_size, 0.4) assert dp.test_size + dp.val_size + dp.train_size == 1.0 assert len(dp.test_samples) == 27 assert len(dp.val_samples) == 53 assert len(dp.train_samples) == 53 train_labels_count = Counter([i['label'] for i in dp.train_samples]) val_labels_count = Counter([i['label'] for i in dp.val_samples]) test_labels_count = Counter([i['label'] for i in dp.test_samples]) assert test_labels_count[1] == 14 assert test_labels_count[2] == 8 assert test_labels_count[3] == 5 assert val_labels_count[1] == 26 assert val_labels_count[2] == 16 assert val_labels_count[3] == 11 assert train_labels_count[1] == 27 assert train_labels_count[2] == 16 assert train_labels_count[3] == 10
def test_split_samples_half(self): global dp dp.samples = load_json(TEST_SPLIT_FILE) dp.test_size = 0.2 dp.val_size = 0.5 dp.part_size = 1 / 2 dp._split_samples() npt.assert_almost_equal(dp.train_size, 0.3) assert dp.test_size + dp.val_size + dp.train_size == 1.0 assert len(dp.test_samples) == 20 assert len(dp.val_samples) == 50 assert len(dp.train_samples) == 30 train_labels_count = Counter([i['label'] for i in dp.train_samples]) val_labels_count = Counter([i['label'] for i in dp.val_samples]) test_labels_count = Counter([i['label'] for i in dp.test_samples]) assert test_labels_count[1] == 10 assert test_labels_count[2] == 6 assert test_labels_count[3] == 4 assert val_labels_count[1] == 25 assert val_labels_count[2] == 15 assert val_labels_count[3] == 10 assert train_labels_count[1] == 15 assert train_labels_count[2] == 9 assert train_labels_count[3] == 6
def test_split_samples_full_2(self): global dp dp.samples = load_json(TEST_SPLIT_FILE) dp.test_size = 0.2 dp.val_size = 0.1 dp.part_size = 1.0 dp._split_samples() npt.assert_almost_equal(dp.train_size, 0.7) assert dp.test_size + dp.val_size + dp.train_size == 1.0 assert len(dp.test_samples) == 40 assert len(dp.val_samples) == 20 assert len(dp.train_samples) == 140 train_labels_count = Counter([i['label'] for i in dp.train_samples]) val_labels_count = Counter([i['label'] for i in dp.val_samples]) test_labels_count = Counter([i['label'] for i in dp.test_samples]) assert test_labels_count[1] == 20 assert test_labels_count[2] == 12 assert test_labels_count[3] == 8 assert val_labels_count[1] == 10 assert val_labels_count[2] == 6 assert val_labels_count[3] == 4 assert train_labels_count[1] == 70 assert train_labels_count[2] == 42 assert train_labels_count[3] == 28
def test__validate_samples_2(self, mocker): mp_logger_info = mocker.patch('logging.Logger.info') global dp dp.valid_image_ids = ['1.jpg', '2.jpg', '3.jpg', '4.jpg'] dp.samples = load_json(TEST_INT_FILE) dp._validate_samples() calls = [ call('\n****** Running samples validation ******\n'), call('The following samples were dropped:'), call("- {'image_id': '5.jpg', 'label': 1}"), call("- {'image_id': '6.jpg', 'label': 1}"), call("- {'image_id': '7.jpg', 'label': 2}"), call("- {'image_id': '8.jpg', 'label': 1}"), call("- {'image_id': '9.jpg', 'label': 1}"), call("- {'image_id': '10.jpg', 'label': 2}"), call("- {'image_id': '11.jpg', 'label': 1}"), call("- {'image_id': '12.jpg', 'label': 1}"), call("- {'image_id': '13.jpg', 'label': 1}"), call("- {'image_id': '14.jpg', 'label': 2}"), call('NOTE: 26 samples were identified as invalid.\n' 'The full list of invalid samples will be saved in job dir.\n' ), call('Class distribution after validation:'), call('1: 2 (50.0%)'), call('2: 2 (50.0%)'), ] mp_logger_info.assert_has_calls(calls)
def test_save_json(self): data = [ {'image_id': 'helmet_1.jpg', 'label': 'left'}, {'image_id': 'helmet_2.jpg', 'label': 'left'}, {'image_id': 'image_png.png', 'label': 'right'}, ] target_file = TEST_TARGET_FILE assert target_file.exists() is False save_json(data, target_file) assert target_file.exists() assert load_json(target_file) == data target_file.unlink()
def test__create_class_mapping(self): global dp dp.samples = load_json(TEST_STR_FILE) dp.samples_count = {'left': 3, 'right': 1} dp._create_class_mapping() expected = {0: 'left', 1: 'right'} assert dp.class_mapping == expected dp.samples = dp.samples[::-1] dp._create_class_mapping() expected = {0: 'left', 1: 'right'} assert dp.class_mapping == expected dp.samples = load_json(TEST_INT_FILE) dp.samples_count = {1: 10, 2: 20} dp._create_class_mapping() print(dp.class_mapping) expected = {0: 1, 1: 2} assert dp.class_mapping == expected dp.samples = dp.samples[::-1] dp._create_class_mapping() expected = {0: 1, 1: 2} assert dp.class_mapping == expected
def test__apply_class_mapping(self): global dp dp.samples = load_json(TEST_STR_FILE) dp.class_mapping = {0: 'left', 1: 'right'} dp._apply_class_mapping() expected = load_json(TEST_FILE_STR2INT) assert dp.samples == expected dp.samples = [ { 'image_id': 'helmet_2.jpg', 'label': 'left' }, { 'image_id': 'image_png.png', 'label': 'right', 'test': 'abc' }, ] expected = [ { 'image_id': 'helmet_2.jpg', 'label': 0 }, { 'image_id': 'image_png.png', 'label': 1 }, ] dp._apply_class_mapping() assert dp.samples == expected dp.samples = [ { 'image_id': 'helmet_2.jpg', 'label': 'left' }, { 'image_id': 'image_png.png', 'label': 'right' }, ] expected = [ { 'image_id': 'helmet_2.jpg', 'label': 1 }, { 'image_id': 'image_png.png', 'label': 0 }, ] assert dp.samples != expected dp.samples = [ { 'image_id': 'helmet_2.jpg', 'label': 1 }, { 'image_id': 'image_png.png', 'label': 0 }, ] expected = [ { 'image_id': 'helmet_2.jpg', 'label': 1 }, { 'image_id': 'image_png.png', 'label': 0 }, ] assert dp.samples == expected