def __init__(self, image_dir: str, job_dir: str, batch_size: int = BATCH_SIZE, base_model_name: str = BASE_MODEL_NAME, **kwargs) -> None: """Inits evaluation component. Loads the best model from job directory. Creates evaluation directory if app was started from commandline. """ self.image_dir = Path(image_dir).resolve() self.job_dir = Path(job_dir).resolve() self.batch_size = batch_size self.base_model_name = base_model_name self.logger = get_logger(__name__, self.job_dir) self.samples_test: list = load_json( self.job_dir / 'test_samples.json') # type: ignore self.class_mapping: dict = load_json( self.job_dir / 'class_mapping.json') # type: ignore self.n_classes = len(self.class_mapping) self.classes = [ str(self.class_mapping[str(i)]) for i in range(self.n_classes) ] self.y_true = np.array([i['label'] for i in self.samples_test]) self._determine_plot_params() self._load_best_model() self._create_evaluation_dir()
def __init__(self, tf_dir: str, region: str, instance_type: str, vpc_id: str, s3_bucket: str, job_dir: str, cloud_tag: str, **kwargs) -> None: """Inits cloud component. Sets *remote workdir* and ensures that *s3 bucket prefix* is correct. """ self.tf_dir = tf_dir self.region = region self.instance_type = instance_type self.vpc_id = vpc_id self.s3_bucket = URL( s3_bucket ) # needed for IAM setup; bucket will not be created by terraform self.cloud_tag = cloud_tag if 's3://' in str(job_dir): self.job_dir = URL(job_dir) else: self.job_dir = Path(job_dir).resolve() self.image_dir: Optional[Path] = None self.ssh: Optional[str] = None self.remote_workdir = Path('/home/ec2-user/image-atm').resolve() self._check_s3_prefix() self.logger = get_logger(__name__, Path(self.job_dir))
def __init__(self, job_dir: str, image_dir: str, samples_file: str, min_class_size: int = MIN_CLASS_SIZE, test_size: float = TEST_SIZE, val_size: float = VALIDATION_SIZE, part_size: float = PART_SIZE, **kwargs) -> None: """Inits data preparation component. Loads samples file. Initializes variables for further operations: *valid_image_ids*, *class_mapping*, *train_samples*, *val_samples*, *test_samples*. """ self.job_dir = Path(job_dir).resolve() if not self.job_dir.exists(): os.makedirs(self.job_dir) self.image_dir = Path(image_dir) self.samples_file = Path(samples_file) self.samples_file = Path(samples_file) self.min_class_size = min_class_size self.test_size = test_size self.val_size = val_size self.part_size = part_size self.class_mapping: Optional[dict] = None self.valid_image_ids: Optional[List[str]] = None self.train_samples = None self.val_samples = None self.test_samples = None self.logger = get_logger(__name__, self.job_dir) self.samples = load_json(self.samples_file)
def __init__( self, image_dir: str, job_dir: str, epochs_train_dense: typing.Union[int, str] = EPOCHS_TRAIN_DENSE, epochs_train_all: typing.Union[int, str] = EPOCHS_TRAIN_ALL, learning_rate_dense: typing.Union[float, str] = LEARNING_RATE_DENSE, learning_rate_all: typing.Union[float, str] = LEARNING_RATE_ALL, batch_size: typing.Union[int, str] = BATCH_SIZE, dropout_rate: typing.Union[float, str] = DROPOUT_RATE, base_model_name: str = BASE_MODEL_NAME, loss: str = LOSS, **kwargs, ) -> None: self.image_dir = Path(image_dir).resolve() self.job_dir = Path(job_dir).resolve() self.logger = get_logger(__name__, self.job_dir) self.samples_train = load_json(self.job_dir / 'train_samples.json') self.samples_val = load_json(self.job_dir / 'val_samples.json') self.class_mapping = load_json(self.job_dir / 'class_mapping.json') self.n_classes = len(self.class_mapping) self.epochs_train_dense = int(epochs_train_dense) self.epochs_train_all = int(epochs_train_all) self.learning_rate_dense = float(learning_rate_dense) self.learning_rate_all = float(learning_rate_all) self.batch_size = int(batch_size) self.dropout_rate = float(dropout_rate) self.base_model_name = base_model_name self.loss = loss self.use_multiprocessing, self.workers = use_multiprocessing()
def __init__(self, tf_dir: str, region: str, instance_type: str, vpc_id: str, s3_bucket: str, job_dir: str, cloud_tag: str, **kwargs) -> None: self.tf_dir = tf_dir self.region = region self.instance_type = instance_type self.vpc_id = vpc_id self.s3_bucket = s3_bucket # needed for IAM setup; bucket will not be created by terraform self.job_dir = os.path.abspath(job_dir) self.cloud_tag = cloud_tag self.image_dir: Optional[str] = None self.ssh: Optional[str] = None self.remote_workdir = '/home/ec2-user/image-atm' self._check_s3_prefix() self.logger = get_logger(__name__, Path(self.job_dir))
def pipeline( config: Config, config_file: Path, job_dir: Optional[Path] = None, image_dir: Optional[Path] = None, samples_file: Optional[Path] = None, provider: Optional[str] = None, instance_type: Optional[str] = None, region: Optional[str] = None, vpc_id: Optional[str] = None, bucket: Optional[str] = None, tf_dir: Optional[Path] = None, train_cloud: Optional[bool] = None, destroy: Optional[bool] = None, resize: Optional[bool] = None, batch_size: Optional[int] = None, learning_rate_dense: Optional[float] = None, learning_rate_all: Optional[float] = None, epochs_train_dense: Optional[int] = None, epochs_train_all: Optional[int] = None, base_model_name: Optional[str] = None, cloud_tag: Optional[str] = None, create_report: Optional[bool] = None, kernel_name: Optional[str] = None, export_html: Optional[bool] = None, export_pdf: Optional[bool] = None, ): """Runs the entire pipeline based on config file.""" config = update_config( config=config, config_file=config_file, job_dir=job_dir, image_dir=image_dir, samples_file=samples_file, provider=provider, instance_type=instance_type, region=region, vpc_id=vpc_id, bucket=bucket, tf_dir=tf_dir, train_cloud=train_cloud, destroy=destroy, resize=resize, batch_size=batch_size, learning_rate_dense=learning_rate_dense, learning_rate_all=learning_rate_all, epochs_train_dense=epochs_train_dense, epochs_train_all=epochs_train_all, base_model_name=base_model_name, cloud_tag=cloud_tag, create_report=create_report, kernel_name=kernel_name, export_html=export_html, export_pdf=export_pdf, ) validate_config(config, config.pipeline) Path(config.job_dir).resolve().mkdir(parents=True, exist_ok=True) logger = get_logger(__name__, config.job_dir) # type: ignore if 'dataprep' in config.pipeline: from imageatm.scripts import run_dataprep logger.info('\n********************************\n' '******* Data preparation *******\n' '********************************') dp = run_dataprep(**config.dataprep) # update image_dir if images were resized if config.dataprep.get('resize', False): config.image_dir = dp.image_dir # type: ignore config = update_component_configs(config) if 'train' in config.pipeline: logger.info('\n********************************\n' '*********** Training ***********\n' '********************************') if config.train.get('cloud'): from imageatm.scripts import run_training_cloud run_training_cloud(**{**config.cloud, **config.train}) else: from imageatm.scripts import run_training run_training(**config.train) if 'evaluate' in config.pipeline: from imageatm.scripts import run_evaluation logger.info('\n********************************\n' '********** Evaluation **********\n' '********************************') run_evaluation(**config.evaluate) if 'cloud' in config.pipeline: from imageatm.scripts import run_cloud run_cloud(**config.cloud)