import logging from src.arguments import parser, args from src.dataset import get_datasets from src.io.load import load_songs_from_csv, load_songs_from_folder from src.constants import DEFAULT_OUTPUT from src.io.save import save_output from src.refactor import refactor if __name__ == '__main__': # Set Logging logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO) # Download datasets if args.get_datasets_switch: get_datasets() # Get input args and show help if args.files_songs is not None: input_files = load_songs_from_csv(args.files_songs) elif args.music_folder is not None: input_files = load_songs_from_folder(args.music_folder) else: parser.print_help() exit() # Get output args output_file = args.output if args.output is not None else DEFAULT_OUTPUT # Run refactor recommendation and save results save_output(refactor(input_files), output_file)
def main(args): # setup random.seed(args.seed) ngpus = torch.cuda.device_count() if ngpus == 0: raise RuntimeWarning("This will not be able to run on CPU only") print(f"Working with {ngpus} GPUs") print(args.config) current_experiment_time = datetime.now().strftime('%Y%m%d_%T').replace( ":", "") save_folder = pathlib.Path(f"./preds/{current_experiment_time}") save_folder.mkdir(parents=True, exist_ok=True) with (save_folder / 'args.txt').open('w') as f: print(vars(args), file=f) args_list = [] for config in args.config: config_file = pathlib.Path(config).resolve() print(config_file) ckpt = config_file.with_name("model_best.pth.tar") with config_file.open("r") as file: old_args = yaml.safe_load(file) old_args = SimpleNamespace(**old_args, ckpt=ckpt) # set default normalisation if not hasattr(old_args, "normalisation"): old_args.normalisation = "minmax" print(old_args) args_list.append(old_args) if args.on == "test": args.pred_folder = save_folder / f"test_segs_tta{args.tta}" args.pred_folder.mkdir(exist_ok=True) elif args.on == "val": args.pred_folder = save_folder / f"validation_segs_tta{args.tta}" args.pred_folder.mkdir(exist_ok=True) else: args.pred_folder = save_folder / f"training_segs_tta{args.tta}" args.pred_folder.mkdir(exist_ok=True) # Create model models_list = [] normalisations_list = [] for model_args in args_list: print(model_args.arch) model_maker = getattr(models, model_args.arch) model = model_maker(4, 3, width=model_args.width, deep_supervision=model_args.deep_sup, norm_layer=get_norm_layer(model_args.norm_layer), dropout=model_args.dropout) print(f"Creating {model_args.arch}") reload_ckpt_bis(str(model_args.ckpt), model) models_list.append(model) normalisations_list.append(model_args.normalisation) print("reload best weights") print(model) dataset_minmax = get_datasets(args.seed, False, no_seg=True, on=args.on, normalisation="minmax") dataset_zscore = get_datasets(args.seed, False, no_seg=True, on=args.on, normalisation="zscore") loader_minmax = torch.utils.data.DataLoader(dataset_minmax, batch_size=1, num_workers=2) loader_zscore = torch.utils.data.DataLoader(dataset_zscore, batch_size=1, num_workers=2) print("Val dataset number of batch:", len(loader_minmax)) generate_segmentations((loader_minmax, loader_zscore), models_list, normalisations_list, args)
def main(args): """ The main training function. Only works for single node (be it single or multi-GPU) Parameters ---------- args : Parsed arguments """ # setup ngpus = torch.cuda.device_count() if ngpus == 0: raise RuntimeWarning("This will not be able to run on CPU only") print(f"Working with {ngpus} GPUs") if args.optim.lower() == "ranger": # No warm up if ranger optimizer args.warm = 0 current_experiment_time = datetime.now().strftime('%Y%m%d_%T').replace(":", "") args.exp_name = f"{'debug_' if args.debug else ''}{current_experiment_time}_" \ f"_fold{args.fold if not args.full else 'FULL'}" \ f"_{args.arch}_{args.width}" \ f"_batch{args.batch_size}" \ f"_optim{args.optim}" \ f"_{args.optim}" \ f"_lr{args.lr}-wd{args.weight_decay}_epochs{args.epochs}_deepsup{args.deep_sup}" \ f"_{'fp16' if not args.no_fp16 else 'fp32'}" \ f"_warm{args.warm}_" \ f"_norm{args.norm_layer}{'_swa' + str(args.swa_repeat) if args.swa else ''}" \ f"_dropout{args.dropout}" \ f"_warm_restart{args.warm_restart}" \ f"{'_' + args.com.replace(' ', '_') if args.com else ''}" args.save_folder = pathlib.Path(f"./runs/{args.exp_name}") args.save_folder.mkdir(parents=True, exist_ok=True) args.seg_folder = args.save_folder / "segs" args.seg_folder.mkdir(parents=True, exist_ok=True) args.save_folder = args.save_folder.resolve() save_args(args) t_writer = SummaryWriter(str(args.save_folder)) # Create model print(f"Creating {args.arch}") model_maker = getattr(models, args.arch) model = model_maker( 4, 3, width=args.width, deep_supervision=args.deep_sup, norm_layer=get_norm_layer(args.norm_layer), dropout=args.dropout) print(f"total number of trainable parameters {count_parameters(model)}") if args.swa: # Create the average model swa_model = model_maker( 4, 3, width=args.width, deep_supervision=args.deep_sup, norm_layer=get_norm_layer(args.norm_layer)) for param in swa_model.parameters(): param.detach_() swa_model = swa_model.cuda() swa_model_optim = WeightSWA(swa_model) if ngpus > 1: model = torch.nn.DataParallel(model).cuda() else: model = model.cuda() print(model) model_file = args.save_folder / "model.txt" with model_file.open("w") as f: print(model, file=f) criterion = EDiceLoss().cuda() metric = criterion.metric print(metric) rangered = False # needed because LR scheduling scheme is different for this optimizer if args.optim == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, eps=1e-4) elif args.optim == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=0.9, nesterov=True) elif args.optim == "adamw": print(f"weight decay argument will not be used. Default is 11e-2") optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr) elif args.optim == "ranger": optimizer = Ranger(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) rangered = True # optionally resume from a checkpoint if args.resume: reload_ckpt(args, model, optimizer) if args.debug: args.epochs = 2 args.warm = 0 args.val = 1 if args.full: train_dataset, bench_dataset = get_datasets(args.seed, args.debug, full=True) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False, drop_last=True) bench_loader = torch.utils.data.DataLoader( bench_dataset, batch_size=1, num_workers=args.workers) else: train_dataset, val_dataset, bench_dataset = get_datasets(args.seed, args.debug, fold_number=args.fold) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=False, drop_last=True) val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=max(1, args.batch_size // 2), shuffle=False, pin_memory=False, num_workers=args.workers, collate_fn=determinist_collate) bench_loader = torch.utils.data.DataLoader( bench_dataset, batch_size=1, num_workers=args.workers) print("Val dataset number of batch:", len(val_loader)) print("Train dataset number of batch:", len(train_loader)) # create grad scaler scaler = GradScaler() # Actual Train loop best = np.inf print("start warm-up now!") if args.warm != 0: tot_iter_train = len(train_loader) scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda cur_iter: (1 + cur_iter) / (tot_iter_train * args.warm)) patients_perf = [] if not args.resume: for epoch in range(args.warm): ts = time.perf_counter() model.train() training_loss = step(train_loader, model, criterion, metric, args.deep_sup, optimizer, epoch, t_writer, scaler, scheduler, save_folder=args.save_folder, no_fp16=args.no_fp16, patients_perf=patients_perf) te = time.perf_counter() print(f"Train Epoch done in {te - ts} s") # Validate at the end of epoch every val step if (epoch + 1) % args.val == 0 and not args.full: model.eval() with torch.no_grad(): validation_loss = step(val_loader, model, criterion, metric, args.deep_sup, optimizer, epoch, t_writer, save_folder=args.save_folder, no_fp16=args.no_fp16) t_writer.add_scalar(f"SummaryLoss/overfit", validation_loss - training_loss, epoch) if args.warm_restart: print('Total number of epochs should be divisible by 30, else it will do odd things') scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 30, eta_min=1e-7) else: scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.epochs + 30 if not rangered else round( args.epochs * 0.5)) print("start training now!") if args.swa: # c = 15, k=3, repeat = 5 c, k, repeat = 30, 3, args.swa_repeat epochs_done = args.epochs reboot_lr = 0 if args.debug: c, k, repeat = 2, 1, 2 for epoch in range(args.start_epoch + args.warm, args.epochs + args.warm): try: # do_epoch for one epoch ts = time.perf_counter() model.train() training_loss = step(train_loader, model, criterion, metric, args.deep_sup, optimizer, epoch, t_writer, scaler, save_folder=args.save_folder, no_fp16=args.no_fp16, patients_perf=patients_perf) te = time.perf_counter() print(f"Train Epoch done in {te - ts} s") # Validate at the end of epoch every val step if (epoch + 1) % args.val == 0 and not args.full: model.eval() with torch.no_grad(): validation_loss = step(val_loader, model, criterion, metric, args.deep_sup, optimizer, epoch, t_writer, save_folder=args.save_folder, no_fp16=args.no_fp16, patients_perf=patients_perf) t_writer.add_scalar(f"SummaryLoss/overfit", validation_loss - training_loss, epoch) if validation_loss < best: best = validation_loss model_dict = model.state_dict() save_checkpoint( dict( epoch=epoch, arch=args.arch, state_dict=model_dict, optimizer=optimizer.state_dict(), scheduler=scheduler.state_dict(), ), save_folder=args.save_folder, ) ts = time.perf_counter() print(f"Val epoch done in {ts - te} s") if args.swa: if (args.epochs - epoch - c) == 0: reboot_lr = optimizer.param_groups[0]['lr'] if not rangered: scheduler.step() print("scheduler stepped!") else: if epoch / args.epochs > 0.5: scheduler.step() print("scheduler stepped!") except KeyboardInterrupt: print("Stopping training loop, doing benchmark") break if args.swa: swa_model_optim.update(model) print("SWA Model initialised!") for i in range(repeat): optimizer = torch.optim.Adam(model.parameters(), args.lr / 2, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, c + 10) for swa_epoch in range(c): # do_epoch for one epoch ts = time.perf_counter() model.train() swa_model.train() current_epoch = epochs_done + i * c + swa_epoch training_loss = step(train_loader, model, criterion, metric, args.deep_sup, optimizer, current_epoch, t_writer, scaler, no_fp16=args.no_fp16, patients_perf=patients_perf) te = time.perf_counter() print(f"Train Epoch done in {te - ts} s") t_writer.add_scalar(f"SummaryLoss/train", training_loss, current_epoch) # update every k epochs and val: print(f"cycle number: {i}, swa_epoch: {swa_epoch}, total_cycle_to_do {repeat}") if (swa_epoch + 1) % k == 0: swa_model_optim.update(model) if not args.full: model.eval() swa_model.eval() with torch.no_grad(): validation_loss = step(val_loader, model, criterion, metric, args.deep_sup, optimizer, current_epoch, t_writer, save_folder=args.save_folder, no_fp16=args.no_fp16) swa_model_loss = step(val_loader, swa_model, criterion, metric, args.deep_sup, optimizer, current_epoch, t_writer, swa=True, save_folder=args.save_folder, no_fp16=args.no_fp16) t_writer.add_scalar(f"SummaryLoss/val", validation_loss, current_epoch) t_writer.add_scalar(f"SummaryLoss/swa", swa_model_loss, current_epoch) t_writer.add_scalar(f"SummaryLoss/overfit", validation_loss - training_loss, current_epoch) t_writer.add_scalar(f"SummaryLoss/overfit_swa", swa_model_loss - training_loss, current_epoch) scheduler.step() epochs_added = c * repeat save_checkpoint( dict( epoch=args.epochs + epochs_added, arch=args.arch, state_dict=swa_model.state_dict(), optimizer=optimizer.state_dict() ), save_folder=args.save_folder, ) else: save_checkpoint( dict( epoch=args.epochs, arch=args.arch, state_dict=model.state_dict(), optimizer=optimizer.state_dict() ), save_folder=args.save_folder, ) try: df_individual_perf = pd.DataFrame.from_records(patients_perf) print(df_individual_perf) df_individual_perf.to_csv(f'{str(args.save_folder)}/patients_indiv_perf.csv') reload_ckpt_bis(f'{str(args.save_folder)}/model_best.pth.tar', model) generate_segmentations(bench_loader, model, t_writer, args) except KeyboardInterrupt: print("Stopping right now!")