Esempio n. 1
0
    def __init__(self,
                 image_dir: str,
                 job_dir: str,
                 batch_size: int = BATCH_SIZE,
                 base_model_name: str = BASE_MODEL_NAME,
                 **kwargs) -> None:
        """Inits evaluation component.

        Loads the best model from job directory.
        Creates evaluation directory if app was started from commandline.
        """
        self.image_dir = Path(image_dir).resolve()
        self.job_dir = Path(job_dir).resolve()
        self.batch_size = batch_size
        self.base_model_name = base_model_name

        self.logger = get_logger(__name__, self.job_dir)
        self.samples_test: list = load_json(
            self.job_dir / 'test_samples.json')  # type: ignore
        self.class_mapping: dict = load_json(
            self.job_dir / 'class_mapping.json')  # type: ignore
        self.n_classes = len(self.class_mapping)
        self.classes = [
            str(self.class_mapping[str(i)]) for i in range(self.n_classes)
        ]
        self.y_true = np.array([i['label'] for i in self.samples_test])

        self._determine_plot_params()
        self._load_best_model()
        self._create_evaluation_dir()
Esempio n. 2
0
    def __init__(self, tf_dir: str, region: str, instance_type: str,
                 vpc_id: str, s3_bucket: str, job_dir: str, cloud_tag: str,
                 **kwargs) -> None:
        """Inits cloud component.

        Sets *remote workdir* and ensures that *s3 bucket prefix* is correct.
        """
        self.tf_dir = tf_dir
        self.region = region
        self.instance_type = instance_type
        self.vpc_id = vpc_id
        self.s3_bucket = URL(
            s3_bucket
        )  # needed for IAM setup; bucket will not be created by terraform
        self.cloud_tag = cloud_tag
        if 's3://' in str(job_dir):
            self.job_dir = URL(job_dir)
        else:
            self.job_dir = Path(job_dir).resolve()

        self.image_dir: Optional[Path] = None
        self.ssh: Optional[str] = None
        self.remote_workdir = Path('/home/ec2-user/image-atm').resolve()

        self._check_s3_prefix()

        self.logger = get_logger(__name__, Path(self.job_dir))
Esempio n. 3
0
    def __init__(self,
                 job_dir: str,
                 image_dir: str,
                 samples_file: str,
                 min_class_size: int = MIN_CLASS_SIZE,
                 test_size: float = TEST_SIZE,
                 val_size: float = VALIDATION_SIZE,
                 part_size: float = PART_SIZE,
                 **kwargs) -> None:
        """Inits data preparation component.

        Loads samples file. Initializes variables for further operations:
        *valid_image_ids*, *class_mapping*, *train_samples*, *val_samples*, *test_samples*.
        """
        self.job_dir = Path(job_dir).resolve()
        if not self.job_dir.exists():
            os.makedirs(self.job_dir)

        self.image_dir = Path(image_dir)
        self.samples_file = Path(samples_file)
        self.samples_file = Path(samples_file)
        self.min_class_size = min_class_size
        self.test_size = test_size
        self.val_size = val_size
        self.part_size = part_size

        self.class_mapping: Optional[dict] = None
        self.valid_image_ids: Optional[List[str]] = None
        self.train_samples = None
        self.val_samples = None
        self.test_samples = None

        self.logger = get_logger(__name__, self.job_dir)
        self.samples = load_json(self.samples_file)
Esempio n. 4
0
    def __init__(
        self,
        image_dir: str,
        job_dir: str,
        epochs_train_dense: typing.Union[int, str] = EPOCHS_TRAIN_DENSE,
        epochs_train_all: typing.Union[int, str] = EPOCHS_TRAIN_ALL,
        learning_rate_dense: typing.Union[float, str] = LEARNING_RATE_DENSE,
        learning_rate_all: typing.Union[float, str] = LEARNING_RATE_ALL,
        batch_size: typing.Union[int, str] = BATCH_SIZE,
        dropout_rate: typing.Union[float, str] = DROPOUT_RATE,
        base_model_name: str = BASE_MODEL_NAME,
        loss: str = LOSS,
        **kwargs,
    ) -> None:

        self.image_dir = Path(image_dir).resolve()
        self.job_dir = Path(job_dir).resolve()

        self.logger = get_logger(__name__, self.job_dir)
        self.samples_train = load_json(self.job_dir / 'train_samples.json')
        self.samples_val = load_json(self.job_dir / 'val_samples.json')
        self.class_mapping = load_json(self.job_dir / 'class_mapping.json')
        self.n_classes = len(self.class_mapping)

        self.epochs_train_dense = int(epochs_train_dense)
        self.epochs_train_all = int(epochs_train_all)
        self.learning_rate_dense = float(learning_rate_dense)
        self.learning_rate_all = float(learning_rate_all)
        self.batch_size = int(batch_size)
        self.dropout_rate = float(dropout_rate)
        self.base_model_name = base_model_name
        self.loss = loss
        self.use_multiprocessing, self.workers = use_multiprocessing()
Esempio n. 5
0
    def __init__(self, tf_dir: str, region: str, instance_type: str,
                 vpc_id: str, s3_bucket: str, job_dir: str, cloud_tag: str,
                 **kwargs) -> None:
        self.tf_dir = tf_dir
        self.region = region
        self.instance_type = instance_type
        self.vpc_id = vpc_id
        self.s3_bucket = s3_bucket  # needed for IAM setup; bucket will not be created by terraform
        self.job_dir = os.path.abspath(job_dir)
        self.cloud_tag = cloud_tag

        self.image_dir: Optional[str] = None
        self.ssh: Optional[str] = None
        self.remote_workdir = '/home/ec2-user/image-atm'

        self._check_s3_prefix()

        self.logger = get_logger(__name__, Path(self.job_dir))
Esempio n. 6
0
def pipeline(
    config: Config,
    config_file: Path,
    job_dir: Optional[Path] = None,
    image_dir: Optional[Path] = None,
    samples_file: Optional[Path] = None,
    provider: Optional[str] = None,
    instance_type: Optional[str] = None,
    region: Optional[str] = None,
    vpc_id: Optional[str] = None,
    bucket: Optional[str] = None,
    tf_dir: Optional[Path] = None,
    train_cloud: Optional[bool] = None,
    destroy: Optional[bool] = None,
    resize: Optional[bool] = None,
    batch_size: Optional[int] = None,
    learning_rate_dense: Optional[float] = None,
    learning_rate_all: Optional[float] = None,
    epochs_train_dense: Optional[int] = None,
    epochs_train_all: Optional[int] = None,
    base_model_name: Optional[str] = None,
    cloud_tag: Optional[str] = None,
    create_report: Optional[bool] = None,
    kernel_name: Optional[str] = None,
    export_html: Optional[bool] = None,
    export_pdf: Optional[bool] = None,
):
    """Runs the entire pipeline based on config file."""
    config = update_config(
        config=config,
        config_file=config_file,
        job_dir=job_dir,
        image_dir=image_dir,
        samples_file=samples_file,
        provider=provider,
        instance_type=instance_type,
        region=region,
        vpc_id=vpc_id,
        bucket=bucket,
        tf_dir=tf_dir,
        train_cloud=train_cloud,
        destroy=destroy,
        resize=resize,
        batch_size=batch_size,
        learning_rate_dense=learning_rate_dense,
        learning_rate_all=learning_rate_all,
        epochs_train_dense=epochs_train_dense,
        epochs_train_all=epochs_train_all,
        base_model_name=base_model_name,
        cloud_tag=cloud_tag,
        create_report=create_report,
        kernel_name=kernel_name,
        export_html=export_html,
        export_pdf=export_pdf,
    )

    validate_config(config, config.pipeline)

    Path(config.job_dir).resolve().mkdir(parents=True, exist_ok=True)

    logger = get_logger(__name__, config.job_dir)  # type: ignore

    if 'dataprep' in config.pipeline:
        from imageatm.scripts import run_dataprep

        logger.info('\n********************************\n'
                    '******* Data preparation *******\n'
                    '********************************')

        dp = run_dataprep(**config.dataprep)

        # update image_dir if images were resized
        if config.dataprep.get('resize', False):
            config.image_dir = dp.image_dir  # type: ignore
            config = update_component_configs(config)

    if 'train' in config.pipeline:
        logger.info('\n********************************\n'
                    '*********** Training ***********\n'
                    '********************************')

        if config.train.get('cloud'):
            from imageatm.scripts import run_training_cloud

            run_training_cloud(**{**config.cloud, **config.train})
        else:
            from imageatm.scripts import run_training

            run_training(**config.train)

    if 'evaluate' in config.pipeline:
        from imageatm.scripts import run_evaluation

        logger.info('\n********************************\n'
                    '********** Evaluation **********\n'
                    '********************************')

        run_evaluation(**config.evaluate)

    if 'cloud' in config.pipeline:
        from imageatm.scripts import run_cloud

        run_cloud(**config.cloud)