def run(config): # build hooks hooks = build_hooks(config) # build model model = build_model(config, hooks) # load checkpoint checkpoint = config.checkpoint last_epoch, step = kvt.utils.load_checkpoint(model, None, checkpoint) print(f'last_epoch:{last_epoch}') # build datasets config.dataset.splits = [ v for v in config.dataset.splits if v.split == config.inference.split ] dataloaders = build_dataloaders(config) dataloaders = [ dataloader for dataloader in dataloaders if dataloader['split'] == config.inference.split ] assert len(dataloaders) == 1, f'len(dataloaders)({len(dataloaders)}) not 1' dataloader = dataloaders[0] model = model.cuda() if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # train loop inference(config=config, model=model, dataloader=dataloader, hooks=hooks)
def run(config): # overwrite path OmegaConf.set_struct(config, True) with open_dict(config): config.trainer.model.params.backbone.params.pretrained = False # build logger logger = build_logger(config) # logging for wandb or mlflow if hasattr(logger, "log_hyperparams"): for k, v in config.trainer.items(): if not k in ("metrics", "inference"): logger.log_hyperparams(params=v) logger.log_hyperparams(params=config.dataset) logger.log_hyperparams(params=config.augmentation) # build dataloaders dataloaders = build_dataloaders(config) # build model model = build_model(config) # build hooks hooks = build_hooks(config) # build lightning module lightning_module = build_lightning_module( config, model=model, optimizer=None, scheduler=None, hooks=hooks, dataloaders=dataloaders, strong_transform=None, ) # load best checkpoint dir_path = config.trainer.callbacks.ModelCheckpoint.dirpath filename = f"fold_{config.dataset.dataset.params.idx_fold}_best.ckpt" best_model_path = os.path.join(dir_path, filename) state_dict = torch.load(best_model_path)["state_dict"] # if using dp, it is necessary to fix state dict keys if (hasattr(config.trainer.trainer, "sync_batchnorm") and config.trainer.trainer.sync_batchnorm): state_dict = kvt.utils.fix_dp_model_state_dict(state_dict) lightning_module.model.load_state_dict(state_dict) # evaluate metric_dict = evaluate(lightning_module, hooks, config, mode=["validation"]) print("Result:") print(metric_dict)
def run(config): # build hooks hooks = build_hooks(config) # build model model = build_model(config, hooks).cuda() # load checkpoint checkpoints = get_checkpoints(config) # build datasets config.dataset.splits = [ v for v in config.dataset.splits if v.split == 'train' or v.split == 'train_all' ] print(config.dataset.splits) dataloaders = build_dataloaders(config) dataloaders = [ dataloader for dataloader in dataloaders if dataloader['split'] == 'train' or dataloader['split'] == 'train_all' ] assert len(dataloaders) == 1, f'len(dataloaders)({len(dataloaders)}) not 1' dataloader = dataloaders[0] kvt.utils.load_checkpoint(model, None, checkpoints[0]) for i, checkpoint in enumerate(checkpoints[1:]): model2 = build_model(config, hooks).cuda() last_epoch, _ = kvt.utils.load_checkpoint(model2, None, checkpoint) if 'ema' in config.swa: moving_average(model, model2, config.swa.ema) else: moving_average(model, model2, 1. / (i + 2)) with torch.no_grad(): bn_update(dataloader, model) if 'ema' in config.swa: output_name = 'ema' else: output_name = 'swa' print('save {}'.format(output_name)) kvt.utils.save_checkpoint(config, model, None, 0, 0, name=output_name, weights_dict={'state_dict': model.state_dict()})
def run(config): # build hooks loss_fn = build_loss(config) metric_fn = build_metrics(config) hooks = build_hooks(config) hooks.update({"loss_fn": loss_fn, "metric_fn": metric_fn}) # build model model = build_model(config) # build callbacks callbacks = build_callbacks(config) # build logger logger = build_logger(config) # debug if config.debug: logger = None OmegaConf.set_struct(config, True) with open_dict(config): config.trainer.trainer.max_epochs = None config.trainer.trainer.max_steps = 10 # logging for wandb or mlflow if hasattr(logger, "log_hyperparams"): for k, v in config.trainer.items(): if not k in ("metrics", "inference"): logger.log_hyperparams(params=v) logger.log_hyperparams(params=config.dataset) logger.log_hyperparams(params=config.augmentation) # last linear training if (hasattr(config.trainer.model, "last_linear") and (config.trainer.model.last_linear.training) and (config.trainer.model.params.pretrained)): model = train_last_linear(config, model, hooks, logger) # initialize model model, params = kvt.utils.initialize_model(config, model) # build optimizer optimizer = build_optimizer(config, model=model, params=params) # build scheduler scheduler = build_scheduler(config, optimizer=optimizer) # build dataloaders dataloaders = build_dataloaders(config) # build strong transform strong_transform, storong_transform_p = build_strong_transform(config) # build lightning module lightning_module = build_lightning_module( config, model=model, optimizer=optimizer, scheduler=scheduler, hooks=hooks, dataloaders=dataloaders, strong_transform=strong_transform, storong_transform_p=storong_transform_p, ) # build plugins # fix this issue # https://github.com/PyTorchLightning/pytorch-lightning/discussions/6219 plugins = [] if hasattr(config.trainer.trainer, "accelerator") and (config.trainer.trainer.accelerator in ("ddp", "ddp2")): if hasattr(config.trainer, "find_unused_parameters"): plugins.append( DDPPlugin(find_unused_parameters=config.trainer. find_unused_parameters), ) else: plugins.append(DDPPlugin(find_unused_parameters=False), ) # best model path dir_path = config.trainer.callbacks.ModelCheckpoint.dirpath if isinstance(OmegaConf.to_container(config.dataset.dataset), list): idx_fold = config.dataset.dataset[0].params.idx_fold else: idx_fold = config.dataset.dataset.params.idx_fold filename = f"fold_{idx_fold}_best.ckpt" best_model_path = os.path.join(dir_path, filename) # train loop trainer = pl.Trainer( logger=logger, callbacks=callbacks, plugins=plugins, **config.trainer.trainer, ) if not config.trainer.skip_training: trainer.fit(lightning_module) path = trainer.checkpoint_callback.best_model_path if path: print(f"Best model: {path}") print("Renaming...") # copy best model subprocess.run(f"mv {path} {best_model_path}", shell=True, stdout=PIPE, stderr=PIPE) # if there is no best_model_path # e.g. no valid dataloader else: print("Saving current trainer...") trainer.save_checkpoint(best_model_path) # log best model if hasattr(logger, "log_hyperparams"): logger.log_hyperparams(params={"best_model_path": best_model_path}) # load best checkpoint if os.path.exists(best_model_path): print(f"Loading best model: {best_model_path}") state_dict = torch.load(best_model_path)["state_dict"] # if using dp, it is necessary to fix state dict keys if (hasattr(config.trainer.trainer, "sync_batchnorm") and config.trainer.trainer.sync_batchnorm): state_dict = kvt.utils.fix_dp_model_state_dict(state_dict) lightning_module.model.load_state_dict(state_dict) else: print(f"Best model {best_model_path} does not exist.") # evaluate metric_dict = evaluate(lightning_module, hooks, config, mode=["validation"]) print("Result:") print(metric_dict) if hasattr(logger, "log_metrics"): logger.log_metrics(metric_dict)
def run(config): pl.seed_everything(config.seed) # overwrite path OmegaConf.set_struct(config, True) with open_dict(config): config.trainer.model.params.backbone.params.pretrained = False # build dataloaders dataloaders = build_short_audio_dataloaders(config) # build model model = build_model(config) # build hooks hooks = build_hooks(config) # build lightning module lightning_module = build_lightning_module( config, model=model, optimizer=None, scheduler=None, hooks=hooks, dataloaders=dataloaders, strong_transform=None, ) # load best checkpoint dir_path = config.trainer.callbacks.ModelCheckpoint.dirpath filename = f"fold_{config.dataset.dataset.params.idx_fold}_best.ckpt" best_model_path = os.path.join(dir_path, filename) state_dict = torch.load(best_model_path)["state_dict"] # if using dp, it is necessary to fix state dict keys if (hasattr(config.trainer.trainer, "sync_batchnorm") and config.trainer.trainer.sync_batchnorm): state_dict = kvt.utils.fix_dp_model_state_dict(state_dict) lightning_module.model.load_state_dict(state_dict) # inference print("---------------------------------------------------------------") print("Inference") lightning_module.eval() lightning_module.cuda() secondwise_dirpath = os.path.join(config.trainer.evaluation.dirpath, "secondwise") clipwise_dirpath = os.path.join(config.trainer.evaluation.dirpath, "clipwise") with torch.no_grad(): for dl_dict in lightning_module.dataloaders: dataloader, split = dl_dict["dataloader"], dl_dict["split"] batch_size = dataloader.batch_size total_size = len(dataloader.dataset) total_step = math.ceil(total_size / batch_size) tbar = tqdm(enumerate(dataloader), total=total_step) for i, data in tbar: x = data["x"].cuda() filenames = data["filename"] seconds = data["second"] outputs = lightning_module(x) kernel_size = outputs["framewise_logit"].shape[ 1] // CLIP_SECONDS clip_wise_predictions = (F.sigmoid( outputs["logit"]).detach().cpu().numpy()) second_wise_predictions = (F.sigmoid( F.max_pool1d( outputs["framewise_logit"].transpose(1, 2), kernel_size=kernel_size, )).detach().cpu().numpy()) if not os.path.exists(secondwise_dirpath): os.makedirs(secondwise_dirpath) if not os.path.exists(clipwise_dirpath): os.makedirs(clipwise_dirpath) for filename, second, c_pred, s_pred in zip( filenames, seconds, clip_wise_predictions, second_wise_predictions): c_path = os.path.join( clipwise_dirpath, f"{config.experiment_name}_{filename}_{second:0>5}.npy", ) s_path = os.path.join( secondwise_dirpath, f"{config.experiment_name}_{filename}_{second:0>5}.npy", ) np.save(c_path, c_pred) np.save(s_path, s_pred)
def run(config): pl.seed_everything(config.seed) # overwrite path OmegaConf.set_struct(config, True) with open_dict(config): config.trainer.model.params.backbone.params.pretrained = False # build model model = build_model(config) # build hooks hooks = build_hooks(config) # build lightning module lightning_module = build_lightning_module( config, model=model, optimizer=None, scheduler=None, hooks=hooks, dataloaders=None, strong_transform=None, ) # load best checkpoint dir_path = config.trainer.callbacks.ModelCheckpoint.dirpath if isinstance(OmegaConf.to_container(config.dataset.dataset), list): idx_fold = config.dataset.dataset[0].params.idx_fold else: idx_fold = config.dataset.dataset.params.idx_fold filename = f"fold_{idx_fold}_best.ckpt" best_model_path = os.path.join(dir_path, filename) state_dict = torch.load(best_model_path)["state_dict"] # if using dp, it is necessary to fix state dict keys if (hasattr(config.trainer.trainer, "sync_batchnorm") and config.trainer.trainer.sync_batchnorm): state_dict = kvt.utils.fix_dp_model_state_dict(state_dict) lightning_module.model.load_state_dict(state_dict) # inference columns = [ f"pred_{i:03}" for i in range(config.trainer.model.params.num_classes) ] for i, (dataloaders, row_ids) in enumerate(build_test_dataloaders(config)): lightning_module.dataloaders = dataloaders _, output = evaluate( lightning_module, hooks, config, mode="test", return_predictions=True, ) output = pd.DataFrame(output[0], columns=columns) output["row_id"] = row_ids # save predictions dataframe path = os.path.join( config.trainer.inference.dirpath, f"{i:03d}_" + config.trainer.inference.filename, ) output.to_pickle(path)
def run(config): # prepare directories prepare_directories(config) # build hooks hooks = build_hooks(config) # build model model = build_model(config, hooks) # build loss loss = build_loss(config) loss_fn = hooks.loss_fn hooks.loss_fn = lambda **kwargs: loss_fn(loss_fn=loss, **kwargs) # build optimizer if 'no_bias_decay' in config.train and config.train.no_bias_decay: if 'encoder_lr_ratio' in config.train: encoder_lr_ratio = config.train.encoder_lr_ratio group_decay_encoder, group_no_decay_encoder = group_weight(model.encoder) group_decay_decoder, group_no_decay_decoder = group_weight(model.decoder) base_lr = config.optimizer.params.lr params = [{'params': group_decay_decoder}, {'params': group_no_decay_decoder, 'weight_decay': 0.0}, {'params': group_decay_encoder, 'lr': base_lr * encoder_lr_ratio}, {'params': group_no_decay_encoder, 'lr': base_lr * encoder_lr_ratio, 'weight_decay': 0.0}] else: group_decay, group_no_decay = group_weight(model) params = [{'params': group_decay}, {'params': group_no_decay, 'weight_decay': 0.0}] elif 'encoder_lr_ratio' in config.train: denom = config.train.encoder_lr_ratio base_lr = config.optimizer.params.lr params = [{'params': model.decoder.parameters()}, {'params': model.encoder.parameters(), 'lr': base_lr * encoder_lr_ratio}] else: params = model.parameters() optimizer = build_optimizer(config, params=params) model = model.cuda() # load checkpoint checkpoint = kvt.utils.get_initial_checkpoint(config) if checkpoint is not None: last_epoch, step = kvt.utils.load_checkpoint(model, optimizer, checkpoint) print('epoch, step:', last_epoch, step) else: last_epoch, step = -1, -1 model, optimizer = to_data_parallel(config, model, optimizer) # build scheduler scheduler = build_scheduler(config, optimizer=optimizer, last_epoch=last_epoch) # build datasets dataloaders = build_dataloaders(config) # build summary writer writer = SummaryWriter(logdir=config.train.dir) logger_fn = hooks.logger_fn hooks.logger_fn = lambda **kwargs: logger_fn(writer=writer, **kwargs) # train loop train(config=config, model=model, optimizer=optimizer, scheduler=scheduler, dataloaders=dataloaders, hooks=hooks, last_epoch=last_epoch+1)
def run(config): # build hooks hooks = build_hooks(config) # build model model = build_model(config, hooks) # load checkpoint checkpoint = config.checkpoint last_epoch, step = kvt.utils.load_checkpoint(model, None, checkpoint) print(f'last_epoch:{last_epoch}') model = model.cuda() if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # build datasets config.dataset.splits = [ v for v in config.dataset.splits if v.split == config.inference.split ] dataloaders = build_dataloaders(config) dataloaders = [ dataloader for dataloader in dataloaders if dataloader['split'] == config.inference.split ] assert len(dataloaders) == 1, f'len(dataloaders)({len(dataloaders)}) not 1' dataloader = dataloaders[0] records = [] # train loop aggregated_outputs_base, aggregated_labels_base = evaluate( config=config, model=model, dataloader=dataloader, hooks=hooks) base_score = hooks.metric_fn(outputs=aggregated_outputs_base, labels=aggregated_labels_base, is_train=False)['score'] print('base_score:', base_score) records.append(('base_score', base_score)) config.transform.name = 'aug_search' REPEAT = 3 # for limit in np.arange(0.1, 0.6, 0.1): # for limit in [90,80,70,60]: for limit in [5, 10, 15, 20]: scores = [] config.transform.params.limit = limit dataloaders = build_dataloaders(config) dataloaders = [ dataloader for dataloader in dataloaders if dataloader['split'] == config.inference.split ] assert len( dataloaders) == 1, f'len(dataloaders)({len(dataloaders)}) not 1' dataloader = dataloaders[0] for _ in range(REPEAT): agg_outputs_aug, _ = evaluate(config=config, model=model, dataloader=dataloader, hooks=hooks) agg_outputs_ensemble = {} agg_outputs_ensemble['labels'] = aggregated_outputs_base['labels'] agg_outputs_ensemble['probabilities'] = ( aggregated_outputs_base['probabilities'] + agg_outputs_aug['probabilities']) / 2.0 agg_outputs_ensemble['cls_probabilities'] = ( aggregated_outputs_base['cls_probabilities'] + agg_outputs_aug['cls_probabilities']) / 2.0 scores.append( hooks.metric_fn(outputs=agg_outputs_ensemble, labels=aggregated_labels_base, is_train=False)['score']) records.append((f'{limit:.02f}', sum(scores) / len(scores))) print(records[-1]) df = pd.DataFrame.from_records(records, columns=['setting', 'score']) df.to_csv('aug.csv', index=False)
def run(config): pl.seed_everything(config.seed) # overwrite path OmegaConf.set_struct(config, True) with open_dict(config): config.trainer.model.params.backbone.params.pretrained = False # build logger logger = build_logger(config) # logging for wandb or mlflow if hasattr(logger, "log_hyperparams"): for k, v in config.trainer.items(): if not k in ("metrics", "inference"): logger.log_hyperparams(params=v) logger.log_hyperparams(params=config.dataset) logger.log_hyperparams(params=config.augmentation) # build model model = build_model(config) # build hooks hooks = build_hooks(config) # build lightning module lightning_module = build_lightning_module( config, model=model, optimizer=None, scheduler=None, hooks=hooks, dataloaders=None, strong_transform=None, ) # load best checkpoint dir_path = config.trainer.callbacks.ModelCheckpoint.dirpath if isinstance(OmegaConf.to_container(config.dataset.dataset), list): idx_fold = config.dataset.dataset[0].params.idx_fold else: idx_fold = config.dataset.dataset.params.idx_fold filename = f"fold_{idx_fold}_best.ckpt" best_model_path = os.path.join(dir_path, filename) state_dict = torch.load(best_model_path)["state_dict"] # if using dp, it is necessary to fix state dict keys if ( hasattr(config.trainer.trainer, "sync_batchnorm") and config.trainer.trainer.sync_batchnorm ): state_dict = kvt.utils.fix_dp_model_state_dict(state_dict) lightning_module.model.load_state_dict(state_dict) # inference columns = [f"pred_{i:03}" for i in range(config.trainer.model.params.num_classes)] outputs = [] for i, (dataloaders, row_ids) in enumerate(build_test_dataloaders(config)): lightning_module.dataloaders = dataloaders _, output = evaluate( lightning_module, hooks, config, mode="test", return_predictions=True, tta=config.augmentation.tta, ) output = pd.DataFrame(output[0], columns=columns) output["row_id"] = row_ids outputs.append(output) outputs = pd.concat(outputs).reset_index(drop=True) # save predictions dataframe path = os.path.join( config.trainer.inference.dirpath, config.trainer.inference.filename, ) outputs.to_pickle(path) # merge labels labels = pd.read_csv(f"{config.input_dir}/train_soundscape_labels.csv") labels["birds"] = labels["birds"].apply(lambda x: x.split()) df = labels.merge(outputs) # target target = ( df["birds"] .apply(lambda x: preprocess_target(x, config.competition.target_unique_values)) .values ) # evaluate result = {} pred = df[columns].values[:, :NUM_CLASSES] > 0.5 for func in [f1_score, precision_score, recall_score]: result[f"train_soundscapes_{func.__name__}_50"] = func( target, pred, average="samples", zero_division=1 ) pred = df[columns].values[:, :NUM_CLASSES] > 0.25 for func in [f1_score, precision_score, recall_score]: result[f"train_soundscapes_{func.__name__}_25"] = func( target, pred, average="samples", zero_division=1 ) print("Result:") print(result) # log if logger is not None: logger.log_metrics(result)