Beispiel #1
0
def load_augs(cfg: DictConfig) -> A.Compose:
    """
    Load albumentations
    Args:
        cfg:
    Returns:
        compose object
    """
    augs = []
    for a in cfg:
        if a['class_name'] == 'albumentations.OneOf':
            small_augs = []
            for small_aug in a['params']:
                # yaml can't contain tuples, so we need to convert manually
                params = {k: (v if type(v) != omegaconf.listconfig.ListConfig else tuple(v)) for k, v in
                          small_aug['params'].items()}
                aug = load_obj(small_aug['class_name'])(**params)
                small_augs.append(aug)
            aug = load_obj(a['class_name'])(small_augs)
            augs.append(aug)

        else:
            params = {k: (v if type(v) != omegaconf.listconfig.ListConfig else tuple(v)) for k, v in
                      a['params'].items()}
            aug = load_obj(a['class_name'])(**params)
            augs.append(aug)

    return A.Compose(augs)
Beispiel #2
0
 def __init__(self, hparams: Dict[str, float], cfg: DictConfig):
     super(LitImageClassification, self).__init__()
     self.cfg = cfg
     self.hparams: Dict[str, float] = hparams
     self.model = load_obj(cfg.model.class_name)(cfg=cfg)
     if not cfg.metric.params:
         self.metric = load_obj(cfg.metric.class_name)()
     else:
         self.metric = load_obj(cfg.metric.class_name)(**cfg.metric.params)
Beispiel #3
0
    def configure_optimizers(self):
        if 'decoder_lr' in self.cfg.optimizer.params.keys():
            params = [
                {'params': self.model.decoder.parameters(), 'lr': self.cfg.optimizer.params.lr},
                {'params': self.model.encoder.parameters(), 'lr': self.cfg.optimizer.params.decoder_lr},
            ]
            optimizer = load_obj(self.cfg.optimizer.class_name)(params)

        else:
            optimizer = load_obj(self.cfg.optimizer.class_name)(self.model.parameters(), **self.cfg.optimizer.params)
        scheduler = load_obj(self.cfg.scheduler.class_name)(optimizer, **self.cfg.scheduler.params)

        return (
            [optimizer],
            [{'scheduler': scheduler, 'interval': self.cfg.scheduler.step, 'monitor': self.cfg.scheduler.monitor}],
        )
Beispiel #4
0
    def setup(self, stage=None):
        mapping_dict = {
            'n01440764': 0,
            'n02102040': 1,
            'n02979186': 2,
            'n03000684': 3,
            'n03028079': 4,
            'n03394916': 5,
            'n03417042': 6,
            'n03425413': 7,
            'n03445777': 8,
            'n03888257': 9,
        }
        train_labels = []
        train_images = []
        for folder in glob.glob(f'{self.cfg.datamodule.path}/train/*'):
            class_name = os.path.basename(os.path.normpath(folder))
            for filename in glob.glob(f'{folder}/*'):
                train_labels.append(mapping_dict[class_name])
                train_images.append(filename)

        val_labels = []
        val_images = []

        for folder in glob.glob(f'{self.cfg.datamodule.path}/val/*'):
            class_name = os.path.basename(os.path.normpath(folder))
            for filename in glob.glob(f'{folder}/*'):
                val_labels.append(mapping_dict[class_name])
                val_images.append(filename)

        if self.cfg.training.debug:
            train_labels = train_labels[:1000]
            train_images = train_images[:1000]
            val_labels = val_labels[:1000]
            val_images = val_images[:1000]

        # train dataset
        dataset_class = load_obj(self.cfg.datamodule.class_name)

        # initialize augmentations
        train_augs = load_augs(self.cfg['augmentation']['train']['augs'])
        valid_augs = load_augs(self.cfg['augmentation']['valid']['augs'])

        self.train_dataset = dataset_class(
            image_names=train_images,
            labels=train_labels,
            transforms=train_augs,
            mode='train',
            labels_to_ohe=self.cfg.datamodule.labels_to_ohe,
            n_classes=self.cfg.training.n_classes,
        )
        self.valid_dataset = dataset_class(
            image_names=val_images,
            labels=val_labels,
            transforms=valid_augs,
            mode='valid',
            labels_to_ohe=self.cfg.datamodule.labels_to_ohe,
            n_classes=self.cfg.training.n_classes,
        )
Beispiel #5
0
def get_test_dataset(cfg: DictConfig) -> object:
    """
    Get test dataset
    Args:
        cfg:
    Returns:
        test dataset
    """

    test_img_dir = f'{cfg.data.folder_path}/test'

    valid_augs = load_augs(cfg['augmentation']['valid']['augs'])
    dataset_class = load_obj(cfg.dataset.class_name)

    test_dataset = dataset_class(dataframe=None, mode='test', image_dir=test_img_dir, cfg=cfg, transforms=valid_augs)

    return test_dataset
Beispiel #6
0
def get_training_datasets(cfg: DictConfig) -> Dict:
    """
    Get datases for modelling
    Args:
        cfg: config
    Returns:
        dict with datasets
    """

    train = pd.read_csv(f'{cfg.data.folder_path}/train.csv')

    train[['x', 'y', 'w', 'h']] = pd.DataFrame(np.stack(train['bbox'].apply(lambda x: ast.literal_eval(x)))).astype(
        np.float32
    )

    # precalculate some values
    train['x1'] = train['x'] + train['w']
    train['y1'] = train['y'] + train['h']
    train['area'] = train['w'] * train['h']
    train_ids, valid_ids = train_test_split(train['image_id'].unique(), test_size=0.1, random_state=cfg.training.seed)

    # for fast training
    if cfg.training.debug:
        train_ids = train_ids[:10]
        valid_ids = valid_ids[:10]

    train_df = train.loc[train['image_id'].isin(train_ids)]
    valid_df = train.loc[train['image_id'].isin(valid_ids)]

    train_img_dir = f'{cfg.data.folder_path}/train'

    # train dataset
    dataset_class = load_obj(cfg.dataset.class_name)

    # initialize augmentations
    train_augs = load_augs(cfg['augmentation']['train']['augs'])
    valid_augs = load_augs(cfg['augmentation']['valid']['augs'])

    train_dataset = dataset_class(dataframe=train_df, mode='train', image_dir=train_img_dir, cfg=cfg, transforms=train_augs)

    valid_dataset = dataset_class(dataframe=valid_df, mode='valid', image_dir=train_img_dir, cfg=cfg, transforms=valid_augs)

    return {'train': train_dataset, 'valid': valid_dataset}
Beispiel #7
0
def run(cfg: DictConfig, new_dir: str) -> None:
    """
    Run pytorch-lightning model
    Args:
        cfg: hydra config
        new_dir: the run path
    """
    # 0. Argument parsing and callback setting
    set_seed(cfg.training.seed)
    hparams = flatten_omegaconf(cfg)

    cfg.callbacks.model_checkpoint.params.filepath = new_dir + cfg.callbacks.model_checkpoint.params.filepath
    callbacks = []
    for callback in cfg.callbacks.other_callbacks:
        if callback.params:
            callback_instance = load_obj(
                callback.class_name)(**callback.params)
        else:
            callback_instance = load_obj(callback.class_name)()
        callbacks.append(callback_instance)

    # 1. Logger
    loggers = []
    if cfg.logging.log:
        for logger in cfg.logging.loggers:
            loggers.append(load_obj(logger.class_name)(**logger.params))

    # tb_logger = TensorBoardLogger(save_dir=cfg.general.logs_folder_name, name=cfg.general.run_dir)
    # csv_logger = CsvLogger()

    neptune.init('zhanghanduo/lgnet')
    neptune.create_experiment(
        name='first-test',
        params={
            "max_epochs": cfg.training.epochs,
            "batch_size": cfg.training.batch_size.train
        }  # Optional,
    )

    # 2. Trainer
    trainer = pl.Trainer(
        logger=loggers,
        early_stop_callback=EarlyStopping(
            **cfg.callbacks.early_stopping.params),
        checkpoint_callback=ModelCheckpoint(
            **cfg.callbacks.model_checkpoint.params),
        callbacks=callbacks,
        **cfg.trainer,
    )
    # 3. Model
    model = load_obj(cfg.training.lightning_module_name)(hparams=hparams,
                                                         cfg=cfg)
    # 4. Data Module
    dm = load_obj(cfg.training.data_module_name)(hparams=hparams, cfg=cfg)

    trainer.fit(model, dm)

    if cfg.general.save_pytorch_model:
        # save as a simple torch model
        model_name = cfg.general.run_dir + '/saved_models/' + cfg.general.run_dir.split(
            '/')[-1] + '.pth'
        print(model_name)
        torch.save(model.model.state_dict(), model_name)

    neptune.stop()