def load_augs(cfg: DictConfig) -> A.Compose: """ Load albumentations Args: cfg: Returns: compose object """ augs = [] for a in cfg: if a['class_name'] == 'albumentations.OneOf': small_augs = [] for small_aug in a['params']: # yaml can't contain tuples, so we need to convert manually params = {k: (v if type(v) != omegaconf.listconfig.ListConfig else tuple(v)) for k, v in small_aug['params'].items()} aug = load_obj(small_aug['class_name'])(**params) small_augs.append(aug) aug = load_obj(a['class_name'])(small_augs) augs.append(aug) else: params = {k: (v if type(v) != omegaconf.listconfig.ListConfig else tuple(v)) for k, v in a['params'].items()} aug = load_obj(a['class_name'])(**params) augs.append(aug) return A.Compose(augs)
def __init__(self, hparams: Dict[str, float], cfg: DictConfig): super(LitImageClassification, self).__init__() self.cfg = cfg self.hparams: Dict[str, float] = hparams self.model = load_obj(cfg.model.class_name)(cfg=cfg) if not cfg.metric.params: self.metric = load_obj(cfg.metric.class_name)() else: self.metric = load_obj(cfg.metric.class_name)(**cfg.metric.params)
def configure_optimizers(self): if 'decoder_lr' in self.cfg.optimizer.params.keys(): params = [ {'params': self.model.decoder.parameters(), 'lr': self.cfg.optimizer.params.lr}, {'params': self.model.encoder.parameters(), 'lr': self.cfg.optimizer.params.decoder_lr}, ] optimizer = load_obj(self.cfg.optimizer.class_name)(params) else: optimizer = load_obj(self.cfg.optimizer.class_name)(self.model.parameters(), **self.cfg.optimizer.params) scheduler = load_obj(self.cfg.scheduler.class_name)(optimizer, **self.cfg.scheduler.params) return ( [optimizer], [{'scheduler': scheduler, 'interval': self.cfg.scheduler.step, 'monitor': self.cfg.scheduler.monitor}], )
def setup(self, stage=None): mapping_dict = { 'n01440764': 0, 'n02102040': 1, 'n02979186': 2, 'n03000684': 3, 'n03028079': 4, 'n03394916': 5, 'n03417042': 6, 'n03425413': 7, 'n03445777': 8, 'n03888257': 9, } train_labels = [] train_images = [] for folder in glob.glob(f'{self.cfg.datamodule.path}/train/*'): class_name = os.path.basename(os.path.normpath(folder)) for filename in glob.glob(f'{folder}/*'): train_labels.append(mapping_dict[class_name]) train_images.append(filename) val_labels = [] val_images = [] for folder in glob.glob(f'{self.cfg.datamodule.path}/val/*'): class_name = os.path.basename(os.path.normpath(folder)) for filename in glob.glob(f'{folder}/*'): val_labels.append(mapping_dict[class_name]) val_images.append(filename) if self.cfg.training.debug: train_labels = train_labels[:1000] train_images = train_images[:1000] val_labels = val_labels[:1000] val_images = val_images[:1000] # train dataset dataset_class = load_obj(self.cfg.datamodule.class_name) # initialize augmentations train_augs = load_augs(self.cfg['augmentation']['train']['augs']) valid_augs = load_augs(self.cfg['augmentation']['valid']['augs']) self.train_dataset = dataset_class( image_names=train_images, labels=train_labels, transforms=train_augs, mode='train', labels_to_ohe=self.cfg.datamodule.labels_to_ohe, n_classes=self.cfg.training.n_classes, ) self.valid_dataset = dataset_class( image_names=val_images, labels=val_labels, transforms=valid_augs, mode='valid', labels_to_ohe=self.cfg.datamodule.labels_to_ohe, n_classes=self.cfg.training.n_classes, )
def get_test_dataset(cfg: DictConfig) -> object: """ Get test dataset Args: cfg: Returns: test dataset """ test_img_dir = f'{cfg.data.folder_path}/test' valid_augs = load_augs(cfg['augmentation']['valid']['augs']) dataset_class = load_obj(cfg.dataset.class_name) test_dataset = dataset_class(dataframe=None, mode='test', image_dir=test_img_dir, cfg=cfg, transforms=valid_augs) return test_dataset
def get_training_datasets(cfg: DictConfig) -> Dict: """ Get datases for modelling Args: cfg: config Returns: dict with datasets """ train = pd.read_csv(f'{cfg.data.folder_path}/train.csv') train[['x', 'y', 'w', 'h']] = pd.DataFrame(np.stack(train['bbox'].apply(lambda x: ast.literal_eval(x)))).astype( np.float32 ) # precalculate some values train['x1'] = train['x'] + train['w'] train['y1'] = train['y'] + train['h'] train['area'] = train['w'] * train['h'] train_ids, valid_ids = train_test_split(train['image_id'].unique(), test_size=0.1, random_state=cfg.training.seed) # for fast training if cfg.training.debug: train_ids = train_ids[:10] valid_ids = valid_ids[:10] train_df = train.loc[train['image_id'].isin(train_ids)] valid_df = train.loc[train['image_id'].isin(valid_ids)] train_img_dir = f'{cfg.data.folder_path}/train' # train dataset dataset_class = load_obj(cfg.dataset.class_name) # initialize augmentations train_augs = load_augs(cfg['augmentation']['train']['augs']) valid_augs = load_augs(cfg['augmentation']['valid']['augs']) train_dataset = dataset_class(dataframe=train_df, mode='train', image_dir=train_img_dir, cfg=cfg, transforms=train_augs) valid_dataset = dataset_class(dataframe=valid_df, mode='valid', image_dir=train_img_dir, cfg=cfg, transforms=valid_augs) return {'train': train_dataset, 'valid': valid_dataset}
def run(cfg: DictConfig, new_dir: str) -> None: """ Run pytorch-lightning model Args: cfg: hydra config new_dir: the run path """ # 0. Argument parsing and callback setting set_seed(cfg.training.seed) hparams = flatten_omegaconf(cfg) cfg.callbacks.model_checkpoint.params.filepath = new_dir + cfg.callbacks.model_checkpoint.params.filepath callbacks = [] for callback in cfg.callbacks.other_callbacks: if callback.params: callback_instance = load_obj( callback.class_name)(**callback.params) else: callback_instance = load_obj(callback.class_name)() callbacks.append(callback_instance) # 1. Logger loggers = [] if cfg.logging.log: for logger in cfg.logging.loggers: loggers.append(load_obj(logger.class_name)(**logger.params)) # tb_logger = TensorBoardLogger(save_dir=cfg.general.logs_folder_name, name=cfg.general.run_dir) # csv_logger = CsvLogger() neptune.init('zhanghanduo/lgnet') neptune.create_experiment( name='first-test', params={ "max_epochs": cfg.training.epochs, "batch_size": cfg.training.batch_size.train } # Optional, ) # 2. Trainer trainer = pl.Trainer( logger=loggers, early_stop_callback=EarlyStopping( **cfg.callbacks.early_stopping.params), checkpoint_callback=ModelCheckpoint( **cfg.callbacks.model_checkpoint.params), callbacks=callbacks, **cfg.trainer, ) # 3. Model model = load_obj(cfg.training.lightning_module_name)(hparams=hparams, cfg=cfg) # 4. Data Module dm = load_obj(cfg.training.data_module_name)(hparams=hparams, cfg=cfg) trainer.fit(model, dm) if cfg.general.save_pytorch_model: # save as a simple torch model model_name = cfg.general.run_dir + '/saved_models/' + cfg.general.run_dir.split( '/')[-1] + '.pth' print(model_name) torch.save(model.model.state_dict(), model_name) neptune.stop()