def run_me(rank=0, Sgrid=[1], Ngrid=[3], nNgrid=1, Ggrid=[25.], nGgrid=1, ngpus=3, idx=0): args = get_args() args.name = "default" cfg = get_cfg(args) cfg.use_ddp = False cfg.use_apex = False gpuid = rank % ngpus # set gpuid gpuid = 1 cfg.gpuid = gpuid cfg.device = f"cuda:{gpuid}" grid_idx = idx * (1 * ngpus) + rank B_grid_idx = (grid_idx % 2) N_grid_idx = (grid_idx // 2) % nNgrid G_grid_idx = grid_idx // (nNgrid * 2) % nGgrid S_grid_idx = grid_idx // (nGgrid * nNgrid * 2) # -- force blind -- B_grid_idx = 0 # -- config settings -- cfg.use_collate = True # cfg.dataset.download = False # cfg.cls = cfg cfg.S = Sgrid[S_grid_idx] # cfg.dataset.name = "cifar10" cfg.dataset.name = "voc" cfg.supervised = False cfg.blind = (B_grid_idx == 0) cfg.blind = ~cfg.supervised cfg.N = Ngrid[N_grid_idx] cfg.N = 5 # -- kpn params -- cfg.kpn_filter_onehot = False cfg.kpn_1f_frame_size = 6 cfg.kpn_frame_size = 6 cfg.kpn_cascade = False cfg.kpn_cascade_num = 1 # cfg.N = 30 cfg.dynamic.frames = cfg.N cfg.noise_type = 'g' cfg.noise_params['g']['stddev'] = Ggrid[G_grid_idx] noise_level = Ggrid[G_grid_idx] # don't worry about cfg.batch_size = 4 cfg.init_lr = 5e-5 cfg.unet_channels = 3 cfg.input_N = cfg.N - 1 cfg.epochs = 300 cfg.color_cat = True cfg.log_interval = 30 #int(int(50000 / cfg.batch_size) / 500) cfg.save_interval = 3 cfg.dynamic.bool = True cfg.dynamic.ppf = 2 cfg.dynamic.random_eraser = False cfg.dynamic.frame_size = 128 cfg.dynamic.total_pixels = cfg.dynamic.ppf * cfg.N # -- load previous experiment -- cfg.load_epoch = 0 cfg.load = cfg.load_epoch > 0 cfg.restart_after_load = True # -- experiment info -- name = "burst_stn" sup_str = "sup" if cfg.supervised else "unsup" bs_str = "b{}".format(cfg.batch_size) kpn_cascade_str = "cascade{}".format( cfg.kpn_cascade_num) if cfg.kpn_cascade else "noCascade" frame_str = "n{}".format(cfg.N) framesize_str = "f{}".format(cfg.dynamic.frame_size) filtersize_str = "filterSized{}".format(cfg.kpn_frame_size) misc = "kpn_klLoss_annealMSE_noalignkpn" cfg.exp_name = f"{sup_str}_{name}_{kpn_cascade_str}_{bs_str}_{frame_str}_{framesize_str}_{filtersize_str}_{misc}" print(f"Experiment name: {cfg.exp_name}") cfg.desc = "Desc: unsup, frames {}, cascade {}, framesize {}, filter size {}, lr {}, kl loss, anneal mse".format( frame_str, kpn_cascade_str, framesize_str, filtersize_str, cfg.init_lr) print(f"Description: [{cfg.desc}]") # -- attn params -- cfg.patch_sizes = [128, 128] cfg.d_model_attn = 3 cfg.input_noise = False cfg.input_noise_middle_only = False cfg.input_with_middle_frame = True cfg.middle_frame_random_erase = False cfg.input_noise_level = noise_level / 255. if (cfg.blind == 0): # e.g. supervised is true cfg.input_with_middle_frame = True if cfg.input_with_middle_frame: cfg.input_N = cfg.N blind = "blind" if cfg.blind else "nonblind" print(grid_idx, blind, cfg.N, Ggrid[G_grid_idx], gpuid) # if blind == "nonblind": return dynamic_str = "dynamic_input_noise" if cfg.input_noise else "dynamic" if cfg.input_noise_middle_only: dynamic_str += "_mo" if cfg.input_with_middle_frame: dynamic_str += "_wmf" postfix = Path( f"./modelBurst/{cfg.exp_name}/{dynamic_str}/{cfg.dynamic.frame_size}_{cfg.dynamic.ppf}_{cfg.dynamic.total_pixels}/{cfg.S}/{blind}/{cfg.N}/{noise_level}/" ) print(postfix, cfg.dynamic.total_pixels) cfg.model_path = cfg.model_path / postfix cfg.optim_path = cfg.optim_path / postfix if not cfg.model_path.exists(): cfg.model_path.mkdir(parents=True) if not cfg.optim_path.exists(): cfg.optim_path.mkdir(parents=True) checkpoint = cfg.model_path / Path("checkpoint_{}.tar".format(cfg.epochs)) # if checkpoint.exists(): return print("N: {} | Noise Level: {}".format(cfg.N, cfg.noise_params['g']['stddev'])) torch.cuda.set_device(gpuid) # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # # Load the Model, Data, Optim, Crit # # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- model, noise_critic, criterion = load_burst_stn_model(cfg) nparams = count_parameters(model) print("Number of Trainable Parameters: {}".format(nparams)) print("PID: {}".format(os.getpid())) # load data # data,loader = load_dataset(cfg,'denoising') data, loader = load_dataset(cfg, 'dynamic') # data,loader = simulate_noisy_dataset(data,loaders,M,N) # load criterion # criterion = nn.BCELoss() # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # # Load the Model from Memory # # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- if cfg.load: name = "denoiser" fp = cfg.model_path / Path("{}/checkpoint_{}.tar".format( name, cfg.load_epoch)) fp = Path( "/home/gauenk/Documents/experiments/cl_gen/output/n2n_wl/cifar10/default/model/modelBurst/unsup_burst_noCascade_b4_n10_f128_filterSized12_kpn_klLoss_annealMSE_klPRes/dynamic_wmf/128_1_10/1/blind/10/25.0/denoiser/checkpoint_{}.tar" .format(cfg.load_epoch)) model.denoiser_info.model = load_model_fp(cfg, model.denoiser_info.model, fp, cfg.gpuid) # name = "critic" # fp = cfg.model_path / Path("{}/checkpoint_{}.tar".format(name,cfg.load_epoch)) # noise_critic.disc = load_model_fp(cfg,noise_critic.disc,fp,cfg.gpuid) if cfg.restart_after_load: cfg.current_epoch = 0 cfg.global_step = 0 else: cfg.current_epoch = cfg.load_epoch + 1 cfg.global_step = cfg.load_epoch * len(train_data) else: cfg.current_epoch = 0 # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # # Pre train-loop setup # # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- te_ave_psnr = {} test_before = False if test_before: ave_psnr, _ = test_loop_burst(cfg, model, criterion, loader.te, -1) print("PSNR before training {:2.3e}".format(ave_psnr)) return if checkpoint.exists() and cfg.load: model = load_model_fp(cfg, model, checkpoint, gpuid) print("Loaded model.") cfg.current_epoch = cfg.epochs + 1 cfg.global_step = len(train_data) * cfg.epochs record_losses = pd.DataFrame({ 'kpn': [], 'ot': [], 'psnr': [], 'psnr_std': [] }) use_record = False loss_type = "sup_r_ot" # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # # Training Loop # # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- for epoch in range(cfg.current_epoch, cfg.epochs): print(cfg.desc) sys.stdout.flush() losses, record_losses = train_loop(cfg, model, loader.tr, epoch, record_losses) if use_record: write_record_losses_file(cfg.current_epoch, postfix, loss_type, record_losses) cfg.current_epoch += 1 if epoch % cfg.save_interval == 0: save_burst_model(cfg, "align", model.align_info.model, model.align_info.optim) save_burst_model(cfg, "denoiser", model.denoiser_info.model, model.denoiser_info.optim) save_burst_model(cfg, "critic", noise_critic.disc, noise_critic.optim) ave_psnr, record_test = test_loop(cfg, model, loader.te, epoch) if use_record: write_record_test_file(cfg.current_epoch, postfix, loss_type, record_test) te_ave_psnr[epoch] = ave_psnr epochs, psnr = zip(*te_ave_psnr.items()) best_index = np.argmax(psnr) best_epoch, best_psnr = epochs[best_index], psnr[best_index] print( f"Best Epoch {best_epoch} | Best PSNR {best_psnr} | N: {cfg.N} | Blind: {blind}" ) root = Path(f"{settings.ROOT_PATH}/output/n2nwl/{postfix}/") # if cfg.blind: root = root / Path(f"./blind/") # else: root = root / Path(f"./nonblind/") fn = Path(f"results.csv") if not root.exists(): root.mkdir(parents=True) path = root / fn with open(path, 'w') as f: f.write("{:d},{:d},{:2.10e},{:d}\n".format(cfg.N, best_epoch, best_psnr, nparams)) save_model(cfg, model, optimizer)
def run_me(rank=0, Sgrid=[1], Ngrid=[8], nNgrid=1, Ggrid=[25.], nGgrid=1, ngpus=3, idx=0): args = get_args() args.name = "default" cfg = get_cfg(args) cfg.use_ddp = False cfg.use_apex = False gpuid = rank % ngpus # set gpuid gpuid = 0 cfg.gpuid = gpuid cfg.device = f"cuda:{gpuid}" grid_idx = idx * (1 * ngpus) + rank B_grid_idx = (grid_idx % 2) N_grid_idx = (grid_idx // 2) % nNgrid G_grid_idx = grid_idx // (nNgrid * 2) % nGgrid S_grid_idx = grid_idx // (nGgrid * nNgrid * 2) # -- force blind -- B_grid_idx = 0 # -- attn parameters -- cfg.patch_sizes = [32, 32] cfg.d_model_attn = 512 # 512 # -- config settings -- cfg.use_collate = True # cfg.dataset.download = False # cfg.cls = cfg cfg.S = Sgrid[S_grid_idx] # cfg.dataset.name = "cifar10" cfg.dataset.name = "voc" cfg.blind = (B_grid_idx == 0) cfg.N = Ngrid[N_grid_idx] cfg.N = 3 cfg.dynamic.frames = cfg.N cfg.noise_type = 'g' cfg.noise_params['g']['stddev'] = Ggrid[G_grid_idx] noise_level = Ggrid[G_grid_idx] # don't worry about cfg.batch_size = 1 cfg.init_lr = 1e-3 cfg.unet_channels = 3 cfg.input_N = cfg.N - 1 cfg.epochs = 30 cfg.color_cat = True cfg.log_interval = int(int(50000 / cfg.batch_size) / 100) cfg.dynamic.bool = True cfg.dynamic.ppf = 2 cfg.dynamic.random_eraser = False cfg.dynamic.frame_size = 32 # cfg.dynamic.total_pixels = cfg.dynamic.ppf * cfg.N cfg.dynamic.total_pixels = 2 * cfg.N cfg.load = False cfg.input_noise = False cfg.middle_frame_random_erase = False cfg.input_noise_level = noise_level / 255. cfg.input_with_middle_frame = True if (cfg.blind == 0): # e.g. supervised is true cfg.input_with_middle_frame = True if cfg.input_with_middle_frame: cfg.input_N = cfg.N blind = "blind" if cfg.blind else "nonblind" print(grid_idx, blind, cfg.N, Ggrid[G_grid_idx], gpuid) # if blind == "nonblind": return dynamic_str = "dynamic_input_noise" if cfg.input_noise else "dynamic" if cfg.input_with_middle_frame: dynamic_str += "_wmf" postfix = Path( f"./{dynamic_str}/{cfg.dynamic.frame_size}_{cfg.dynamic.ppf}_{cfg.dynamic.total_pixels}/{cfg.S}/{blind}/{cfg.N}/{noise_level}/" ) print(postfix, cfg.dynamic.total_pixels) cfg.model_path = cfg.model_path / postfix cfg.optim_path = cfg.optim_path / postfix if not cfg.model_path.exists(): cfg.model_path.mkdir(parents=True) if not cfg.optim_path.exists(): cfg.optim_path.mkdir(parents=True) checkpoint = cfg.model_path / Path("checkpoint_{}.tar".format(cfg.epochs)) # if checkpoint.exists(): return print("N: {} | Noise Level: {}".format(cfg.N, cfg.noise_params['g']['stddev'])) torch.cuda.set_device(gpuid) # load model # model,criterion = load_model(cfg) # model,criterion = load_model_kpn(cfg) model, criterion = load_model_attn(cfg) optimizer = load_optimizer(cfg, model) scheduler = load_scheduler(cfg, model, optimizer) nparams = count_parameters(model) print("Number of Trainable Parameters: {}".format(nparams)) # load data # data,loader = load_dataset(cfg,'denoising') data, loader = load_dataset(cfg, 'dynamic') # data,loader = simulate_noisy_dataset(data,loaders,M,N) # load criterion # criterion = nn.BCELoss() if cfg.load: fp = cfg.model_path / Path("checkpoint_30.tar") model = load_model_fp(cfg, model, fp, 0) cfg.current_epoch = 0 te_ave_psnr = {} test_before = False if test_before: ave_psnr = test_loop(cfg, model, criterion, loader.te, -1) print("PSNR before training {:2.3e}".format(ave_psnr)) return if checkpoint.exists() and cfg.load: model = load_model_fp(cfg, model, checkpoint, gpuid) print("Loaded model.") cfg.current_epoch = cfg.epochs for epoch in range(cfg.current_epoch, cfg.epochs): # losses = train_loop(cfg,model,optimizer,criterion,loader.tr,epoch) # ave_psnr = test_loop(cfg,model,optimizer,criterion,loader.tr,epoch) # ave_psnr = test_loop_burst(cfg,model,optimizer,criterion,loader.tr,epoch) # ave_psnr = test_loop_kpn(cfg,model,optimizer,criterion,loader.tr,epoch) # ave_psnr = test_loop_attn(cfg,model,optimizer,criterion,loader.tr,epoch) ave_psnr = dip_loop(cfg, loader.tr, epoch) te_ave_psnr[epoch] = ave_psnr cfg.current_epoch += 1 epochs, psnr = zip(*te_ave_psnr.items()) best_index = np.argmax(psnr) best_epoch, best_psnr = epochs[best_index], psnr[best_index] print( f"Best Epoch {best_epoch} | Best PSNR {best_psnr} | N: {cfg.N} | Blind: {blind}" ) root = Path(f"{settings.ROOT_PATH}/output/dip/{postfix}/") # if cfg.blind: root = root / Path(f"./blind/") # else: root = root / Path(f"./nonblind/") fn = Path(f"results.csv") if not root.exists(): root.mkdir(parents=True) path = root / fn with open(path, 'w') as f: f.write("{:d},{:d},{:2.10e},{:d}\n".format(cfg.N, best_epoch, best_psnr, nparams)) save_model(cfg, model, optimizer)
def run_me(rank=0, Sgrid=[50000], Ngrid=[5], nNgrid=1, Ggrid=[25], nGgrid=1, ngpus=3, idx=0): # def run_me(rank=1,Ngrid=1,Ggrid=1,nNgrid=1,ngpus=3,idx=1): args = get_args() args.name = "default" cfg = get_cfg(args) cfg.use_ddp = False cfg.use_apex = False # gpuid = rank % ngpus # set gpuid gpuid = 2 cfg.gpuid = gpuid cfg.device = f"cuda:{gpuid}" grid_idx = idx * (1 * ngpus) + rank B_grid_idx = (grid_idx % 2) N_grid_idx = (grid_idx // 2) % nNgrid G_grid_idx = grid_idx // (nNgrid * 2) % nGgrid S_grid_idx = grid_idx // (nGgrid * nNgrid * 2) cfg.use_collate = True # cfg.dataset.download = False # cfg.cls = cfg cfg.S = Sgrid[S_grid_idx] # cfg.dataset.name = "cifar10" cfg.dataset.name = "voc" cfg.blind = (B_grid_idx == 0) cfg.N = 2 cfg.dynamic.frames = cfg.N cfg.noise_type = 'g' cfg.noise_params['g']['stddev'] = Ggrid[G_grid_idx] noise_level = Ggrid[G_grid_idx] cfg.batch_size = 16 cfg.init_lr = 1e-3 cfg.unet_channels = 3 # if cfg.blind: cfg.input_N = cfg.N - 1 # else: cfg.input_N = cfg.N cfg.input_N = cfg.N - 1 cfg.epochs = 30 cfg.log_interval = int(int(50000 / cfg.batch_size) / 100) cfg.dynamic.bool = True cfg.dynamic.ppf = 0 cfg.dynamic.frame_size = 256 cfg.dynamic.total_pixels = 0 cfg.load = True blind = "blind" if cfg.blind else "nonblind" print(grid_idx, blind, cfg.N, Ggrid[G_grid_idx], gpuid) # if blind == "nonblind": return postfix = get_postfix_str(cfg, blind, noise_level) cfg.model_path = cfg.model_path / postfix cfg.optim_path = cfg.optim_path / postfix if not cfg.model_path.exists(): cfg.model_path.mkdir(parents=True) if not cfg.optim_path.exists(): cfg.optim_path.mkdir(parents=True) checkpoint = cfg.model_path / Path("checkpoint_{}.tar".format(cfg.epochs)) # if checkpoint.exists(): return print("N: {} | Noise Level: {}".format(cfg.N, cfg.noise_params['g']['stddev'])) torch.cuda.set_device(gpuid) # load model model = load_model(cfg) optimizer = load_optimizer(cfg, model) scheduler = load_scheduler(cfg, model, optimizer) nparams = count_parameters(model) print("Number of Trainable Parameters: {}".format(nparams)) # load data # data,loader = load_dataset(cfg,'denoising') data, loader = load_dataset(cfg, 'dynamic') # data,loader = simulate_noisy_dataset(data,loaders,M,N) # load criterion criterion = nn.BCELoss() # -- optionally load model -- if cfg.load: model = load_model_fp(cfg, model, checkpoint, gpuid) cfg.current_epoch = 0 te_ave_psnr = {} test_before = True if test_before: ave_psnr = test_loop_n2n(cfg, model, criterion, loader.te, -1) print("PSNR before training {:2.3e}".format(ave_psnr)) return if checkpoint.exists() and cfg.load: model = load_model_fp(cfg, model, checkpoint, gpuid) print("Loaded model.") cfg.current_epoch = cfg.epochs for epoch in range(cfg.current_epoch, cfg.epochs): losses = train_loop_n2n(cfg, model, optimizer, criterion, loader.tr, epoch) ave_psnr = test_loop_n2n(cfg, model, criterion, loader.te, epoch) te_ave_psnr[epoch] = ave_psnr cfg.current_epoch += 1 epochs, psnr = zip(*te_ave_psnr.items()) best_index = np.argmax(psnr) best_epoch, best_psnr = epochs[best_index], psnr[best_index] root = Path(f"{settings.ROOT_PATH}/output/n2n/{postfix}/") # if cfg.blind: root = root / Path(f"./blind/") # else: root = root / Path(f"./nonblind/") fn = Path(f"results.csv") if not root.exists(): root.mkdir(parents=True) path = root / fn with open(path, 'w') as f: f.write("{:d},{:d},{:2.10e},{:d}\n".format(cfg.N, best_epoch, best_psnr, nparams)) save_model(cfg, model, optimizer)
def run_me(rank=0,Sgrid=[1],Ngrid=[3],nNgrid=1,Ggrid=[25.],nGgrid=1,ngpus=3,idx=0): cfg = get_main_config() # -- noise info -- noise_type = cfg.noise_params.ntype noise_params = cfg.noise_params['qis'] noise_level = noise_params['readout'] noise_level_str = f"{int(noise_params['alpha']),int(noise_params['readout']),int(noise_params['nbits'])}" # -- experiment info -- name = "abps_v1p0" ds_name = cfg.dataset.name.lower() sup_str = "sup" if cfg.supervised else "unsup" bs_str = "b{}".format(cfg.batch_size) align_str = "yesAlignNet" if cfg.burst_use_alignment else "noAlignNet" unet_str = "yesUnet" if cfg.burst_use_unet else "noUnet" if cfg.burst_use_unet_only: unet_str += "Only" kpn_cascade_str = "cascade{}".format(cfg.kpn_cascade_num) if cfg.kpn_cascade else "noCascade" kpnba_str = "kpnBurstAlpha{}".format(int(cfg.kpn_burst_alpha*1000)) frame_str = "n{}".format(cfg.N) framesize_str = "f{}".format(cfg.dynamic.frame_size) filtersize_str = "filterSized{}".format(cfg.kpn_frame_size) misc = "noKL" cfg.exp_name = f"{sup_str}_{name}_{ds_name}_{kpn_cascade_str}_{bs_str}_{frame_str}_{framesize_str}_{filtersize_str}_{align_str}_{unet_str}_{kpnba_str}_{misc}" print(f"Experiment name: {cfg.exp_name}") desc_fmt = (frame_str,kpn_cascade_str,framesize_str,filtersize_str,cfg.init_lr,align_str) cfg.desc = "Desc: unsup, frames {}, cascade {}, framesize {}, filter size {}, lr {}, {}, kl loss, anneal mse".format(*desc_fmt) print(f"Description: [{cfg.desc}]") noise_level = cfg.noise_params['g']['stddev'] # -- attn params -- cfg.patch_sizes = [128,128] cfg.d_model_attn = 3 cfg.input_noise = False cfg.input_noise_middle_only = False cfg.input_with_middle_frame = True cfg.middle_frame_random_erase = False cfg.input_noise_level = noise_level/255. if (cfg.blind == 0): # e.g. supervised is true cfg.input_with_middle_frame = True if cfg.input_with_middle_frame: cfg.input_N = cfg.N blind = "blind" if cfg.blind else "nonblind" gpuid = cfg.gpuid print(blind,cfg.N,noise_level,gpuid) # if blind == "nonblind": return dynamic_str = "dynamic_input_noise" if cfg.input_noise else "dynamic" if cfg.input_noise_middle_only: dynamic_str += "_mo" if cfg.input_with_middle_frame: dynamic_str += "_wmf" postfix = Path(f"./modelBurst/{cfg.exp_name}/{dynamic_str}/{cfg.dynamic.frame_size}_{cfg.dynamic.ppf}_{cfg.dynamic.total_pixels}/{cfg.S}/{blind}/{cfg.N}/{noise_level}/") print(postfix,cfg.dynamic.total_pixels) cfg.model_path = cfg.model_path / postfix cfg.optim_path = cfg.optim_path / postfix if not cfg.model_path.exists(): cfg.model_path.mkdir(parents=True) if not cfg.optim_path.exists(): cfg.optim_path.mkdir(parents=True) checkpoint = cfg.model_path / Path("checkpoint_{}.tar".format(cfg.epochs)) # if checkpoint.exists(): return print(f"Sim Method: {cfg.sim_method} | Shuffle K {cfg.sim_shuffleK} | Sim K: {cfg.sim_K} | Patchsize: {cfg.sim_patchsize}") print("N: {} | Noise Level: {} | Noise Type: {}".format(cfg.N,noise_level_str,noise_type)) torch.cuda.set_device(gpuid) # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # # init summary writer # # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- log_base = Path(f"runs/{name}") if not log_base.exists(): log_base.mkdir(parents=True) log_dir = log_base / Path(f"{cfg.exp_name}") writer = SummaryWriter(log_dir=str(log_dir)) # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # # Load the Model, Data, Optim, Crit # # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # load model # model = load_unet_model(cfg) # model,criterion = load_burst_n2n_model(cfg) model = load_model(cfg) # model,noise_critic,criterion = load_burst_kpn_model(cfg) # model,criterion = load_model_kpn(cfg) optimizer = load_optimizer(cfg,model) # scheduler = load_scheduler(cfg,model,optimizer) # scheduler = make_lr_scheduler(cfg,model.unet_info.optim) # nparams = count_parameters(model.denoiser_info.model) nparams = count_parameters(model) print("Number of Trainable Parameters: {}".format(nparams)) print("GPUID: {}".format(gpuid)) print("PID: {}".format(os.getpid())) # -- load data -- # data,loader = load_dataset(cfg,'denoising') # data,loader = load_dataset(cfg,'default') data,loader = load_dataset(cfg,'dynamic') # data,loader = load_dataset(cfg,'dynamic-lmdb-all') # data,loader = load_dataset(cfg,'dynamic-lmdb-burst') # data,loader = load_dataset(cfg,'default') # data,loader = simulate_noisy_dataset(data,loaders,M,N) # load criterion # criterion = nn.BCELoss() # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # # Load the Model from Memory # # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- if cfg.load: name = "denoiser" fp = "/home/gauenk/Documents/experiments/cl_gen/output/abps/cifar10/default/model/modelBurst/unsup_abps_burstv2_voc_noCascade_b20_n8_f128_filterSized9_noAlignNet_noUnet_unet_mse_noKL/dynamic_wmf/128_1_8/1/blind/8/25.0/denoiser/checkpoint_83.tar" # fp = cfg.model_path / Path("{}/checkpoint_{}.tar".format(name,cfg.load_epoch)) # fp = Path("/home/gauenk/Documents/experiments/cl_gen/output/n2n_wl/cifar10/default/model/modelBurst/unsup_burst_noCascade_b4_n10_f128_filterSized12_kpn_klLoss_annealMSE_klPRes/dynamic_wmf/128_1_10/1/blind/10/25.0/denoiser/checkpoint_{}.tar".format(cfg.load_epoch)) model.denoiser_info.model = load_model_fp(cfg,model.denoiser_info.model,fp,cfg.gpuid) fp = "/home/gauenk/Documents/experiments/cl_gen/output/abps/cifar10/default/optim/modelBurst/unsup_abps_burstv2_voc_noCascade_b20_n8_f128_filterSized9_noAlignNet_noUnet_unet_mse_noKL/dynamic_wmf/128_1_8/1/blind/8/25.0/denoiser/checkpoint_83.tar" # model.denoiser_info.optim = load_optim_fp(cfg,model.denoiser_info.optim,fp,cfg.gpuid) # name = "critic" # fp = cfg.model_path / Path("{}/checkpoint_{}.tar".format(name,cfg.load_epoch)) # noise_critic.disc = load_model_fp(cfg,noise_critic.disc,fp,cfg.gpuid) if cfg.restart_after_load: cfg.current_epoch = 0 cfg.global_step = 0 else: cfg.current_epoch = cfg.load_epoch+1 cfg.global_step = cfg.load_epoch * len(data.tr) ce,gs = cfg.current_epoch,cfg.global_step print(f"Starting Training from epoch [{ce}] and global step [{gs}]") else: cfg.current_epoch = 0 scheduler = make_lr_scheduler(cfg,optimizer,cfg.global_step) # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # # Pre train-loop setup # # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- te_ave_psnr = {} test_before = False if test_before: ave_psnr,record_test = test_loop(cfg,model,data.te,loader.te,-1,writer) print("PSNR before training {:2.3e}".format(ave_psnr)) return if checkpoint.exists() and cfg.load: model = load_model_fp(cfg,model,checkpoint,cfg.gpuid) print("Loaded model.") cfg.current_epoch = cfg.epochs+1 cfg.global_step = len(train_data) * cfg.epochs record_losses = pd.DataFrame({'kpn':[],'ot':[],'psnr':[],'psnr_std':[]}) use_record = False loss_type = "sup_r_ot" # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # # Training Loop # # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- def check_test_epoch(epoch): return (epoch % 20) == 0 and (epoch > 0) for epoch in range(cfg.current_epoch,cfg.epochs): lr = optimizer.param_groups[0]["lr"] print(cfg.desc) print("Learning Rate: %2.2e"% (lr)) sys.stdout.flush() losses,record_losses = train_loop(cfg,model,optimizer,scheduler,loader.tr, epoch,record_losses,writer) if use_record: write_record_losses_file(cfg.current_epoch,postfix,loss_type,record_losses) cfg.current_epoch += 1 if epoch % cfg.save_interval == 0 and epoch > 0: save_burst_model(cfg,"model",model,optimizer) if check_test_epoch(epoch): ave_psnr,record_test = test_loop(cfg,model,data.te,loader.te,epoch) if use_record: write_record_test_file(cfg.current_epoch,postfix,loss_type,record_test,writer) te_ave_psnr[epoch] = ave_psnr epochs,psnr = zip(*te_ave_psnr.items()) best_index = np.argmax(psnr) best_epoch,best_psnr = epochs[best_index],psnr[best_index] print(f"Best Epoch {best_epoch} | Best PSNR {best_psnr} | N: {cfg.N} | Blind: {blind}") root = Path(f"{settings.ROOT_PATH}/output/abps/{postfix}/") # if cfg.blind: root = root / Path(f"./blind/") # else: root = root / Path(f"./nonblind/") fn = Path(f"results.csv") if not root.exists(): root.mkdir(parents=True) path = root / fn with open(path,'w') as f: f.write("{:d},{:d},{:2.10e},{:d}\n".format(cfg.N,best_epoch,best_psnr,nparams)) save_model(cfg, model, optimizer)
def run_me(rank=0, Sgrid=[50000], Ngrid=[3], nNgrid=1, Ggrid=[25.], nGgrid=1, ngpus=3, idx=0): # def run_me(rank=1,Ngrid=1,Ggrid=1,nNgrid=1,ngpus=3,idx=1): """ PSNR 20 = (can equal) = AWGN @ 25 PSNR 25 = (can equal) = AWGN @ 14 PSNR 28 = (can equal) = AWGN @ 5 """ args = get_args() args.name = "default" cfg = get_cfg(args) cfg.use_ddp = False cfg.use_apex = False gpuid = 0 cfg.gpuid = gpuid # gpuid = rank % ngpus # set gpuid cfg.device = f"cuda:{gpuid}" # -- experiment info -- cfg.exp_name = "sup_stn_standard" cfg.desc = "Desc: sup stn model standard" grid_idx = idx * (1 * ngpus) + rank B_grid_idx = (grid_idx % 2) N_grid_idx = (grid_idx // 2) % nNgrid G_grid_idx = grid_idx // (nNgrid * 2) % nGgrid S_grid_idx = grid_idx // (nGgrid * nNgrid * 2) cfg.use_collate = True # cfg.dataset.download = False # cfg.cls = cfg cfg.S = Sgrid[S_grid_idx] # cfg.dataset.name = "cifar10" cfg.dataset.name = "voc" # cfg.blind = (B_grid_idx == 0) cfg.blind = False cfg.N = Ngrid[N_grid_idx] cfg.N = 3 # 10 cfg.dynamic.frames = cfg.N cfg.noise_type = 'g' cfg.noise_params['g']['stddev'] = Ggrid[G_grid_idx] noise_level = Ggrid[G_grid_idx] cfg.batch_size = 4 cfg.init_lr = 1e-4 cfg.unet_channels = 3 cfg.input_N = cfg.N - 1 cfg.epochs = 100 cfg.log_interval = 50 # int(int(50000 / cfg.batch_size) / 100) cfg.dynamic.bool = True cfg.dynamic.ppf = 2 cfg.dynamic.frame_size = 256 cfg.dynamic.total_pixels = 2 * cfg.N cfg.load = False # -- input noise for learning -- cfg.input_noise = False cfg.input_noise_middle_only = False cfg.input_with_middle_frame = True cfg.input_noise_level = noise_level / 255 if cfg.input_with_middle_frame: cfg.input_N = cfg.N blind = "blind" if cfg.blind else "nonblind" print(grid_idx, blind, cfg.N, Ggrid[G_grid_idx], gpuid, cfg.input_noise, cfg.input_with_middle_frame) # if blind == "nonblind": return dynamic_str = "dynamic_input_noise" if cfg.input_noise else "dynamic" if cfg.input_noise_middle_only: dynamic_str += "_mo" if cfg.input_with_middle_frame: dynamic_str += "_wmf" postfix = Path( f"./{dynamic_str}/{cfg.dynamic.frame_size}_{cfg.dynamic.ppf}_{cfg.dynamic.total_pixels}/{cfg.S}/{blind}/{cfg.N}/{noise_level}/" ) print(postfix) cfg.model_path = cfg.model_path / postfix cfg.optim_path = cfg.optim_path / postfix if not cfg.model_path.exists(): cfg.model_path.mkdir(parents=True) if not cfg.optim_path.exists(): cfg.optim_path.mkdir(parents=True) checkpoint = cfg.model_path / Path("checkpoint_{}.tar".format(cfg.epochs)) # if checkpoint.exists(): return print("PID: {}".format(os.getpid())) print("N: {} | Noise Level: {}".format(cfg.N, cfg.noise_params['g']['stddev'])) torch.cuda.set_device(gpuid) # -- load model -- # model,criterion = load_model_kpn(cfg) model, criterion = load_model_stn(cfg) optimizer = load_optimizer(cfg, model) scheduler = load_scheduler(cfg, model, optimizer) nparams = count_parameters(model) print("Number of Trainable Parameters: {}".format(nparams)) # load data # data,loader = load_dataset(cfg,'denoising') data, loader = load_dataset(cfg, 'dynamic') # data,loader = simulate_noisy_dataset(data,loaders,M,N) if cfg.load: fp = cfg.model_path / Path("checkpoint_30.tar") model = load_model_fp(cfg, model, fp, 0) cfg.current_epoch = 0 te_ave_psnr = {} test_before = False if test_before: ave_psnr = test_loop(cfg, model, criterion, loader.te, -1) print("PSNR before training {:2.3e}".format(ave_psnr)) return if checkpoint.exists() and cfg.load: model = load_model_fp(cfg, model, checkpoint, gpuid) print("Loaded model.") cfg.current_epoch = cfg.epochs cfg.global_step = 0 use_record = False record = init_record() for epoch in range(cfg.current_epoch, cfg.epochs): print(cfg.desc) sys.stdout.flush() losses, epoch_record = train_loop(cfg, model, optimizer, criterion, loader.tr, epoch) if use_record: record = record.append(epoch_record) write_record_file(cfg.current_epoch, postfix, record) ave_psnr = test_loop(cfg, model, criterion, loader.te, epoch) te_ave_psnr[epoch] = ave_psnr cfg.current_epoch += 1 epochs, psnr = zip(*te_ave_psnr.items()) best_index = np.argmax(psnr) best_epoch, best_psnr = epochs[best_index], psnr[best_index] root = Path(f"{settings.ROOT_PATH}/output/n2n-stn/{postfix}/") # if cfg.blind: root = root / Path(f"./blind/") # else: root = root / Path(f"./nonblind/") fn = Path(f"results.csv") if not root.exists(): root.mkdir(parents=True) path = root / fn with open(path, 'w') as f: f.write("{:d},{:d},{:2.10e},{:d}\n".format(cfg.N, best_epoch, best_psnr, nparams)) save_model(cfg, model, optimizer)
def run_me(rank=0,Sgrid=[1],Ngrid=[3],nNgrid=1,Ggrid=[25.],nGgrid=1,ngpus=3,idx=0): # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # algorithm hyperparameters # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- args = get_args() args.name = "default" cfg = get_cfg(args) cfg.use_ddp = False cfg.use_apex = False gpuid = rank % ngpus # set gpuid gpuid = 2 cfg.gpuid = gpuid cfg.device = f"cuda:{gpuid}" grid_idx = idx*(1*ngpus)+rank B_grid_idx = (grid_idx % 2) N_grid_idx = ( grid_idx // 2 ) % nNgrid G_grid_idx = grid_idx // (nNgrid * 2) % nGgrid S_grid_idx = grid_idx // (nGgrid * nNgrid * 2) # -- experiment info -- cfg.exp_name = "unsup_byol_burst" cfg.desc = "Desc: unsup byol burst" # -- force blind -- B_grid_idx = 0 # -- config settings -- cfg.use_collate = True # cfg.dataset.download = False # cfg.cls = cfg cfg.S = Sgrid[S_grid_idx] # cfg.dataset.name = "cifar10" cfg.dataset.name = "voc" cfg.supervised = False cfg.blind = (B_grid_idx == 0) cfg.blind = ~cfg.supervised cfg.N = Ngrid[N_grid_idx] cfg.N = 3 # cfg.N = 30 cfg.dynamic.frames = cfg.N cfg.noise_type = 'g' cfg.noise_params['g']['stddev'] = Ggrid[G_grid_idx] noise_level = Ggrid[G_grid_idx] # don't worry about cfg.batch_size = 4 cfg.init_lr = 1e-4 cfg.unet_channels = 3 cfg.input_N = cfg.N-1 cfg.epochs = 100 cfg.color_cat = True cfg.log_interval = 100 #int(int(50000 / cfg.batch_size) / 500) cfg.dynamic.bool = True cfg.dynamic.ppf = 2 cfg.dynamic.random_eraser = False cfg.dynamic.frame_size = 64 # cfg.dynamic.total_pixels = cfg.dynamic.ppf * cfg.N cfg.dynamic.total_pixels = 2*cfg.N cfg.load = False # -- attn params -- cfg.patch_sizes = [128,128] cfg.d_model_attn = 3 cfg.input_noise = False cfg.input_noise_middle_only = False cfg.input_with_middle_frame = True cfg.middle_frame_random_erase = False cfg.input_noise_level = noise_level/255. if (cfg.blind == 0): # e.g. supervised is true cfg.input_with_middle_frame = True if cfg.input_with_middle_frame: cfg.input_N = cfg.N # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # create strings from settings # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- blind = "blind" if cfg.blind else "nonblind" print(grid_idx,blind,cfg.N,Ggrid[G_grid_idx],gpuid) # if blind == "nonblind": return dynamic_str = "dynamic_input_noise" if cfg.input_noise else "dynamic" if cfg.input_noise_middle_only: dynamic_str += "_mo" if cfg.input_with_middle_frame: dynamic_str += "_wmf" postfix = Path(f"./modelBurst/{dynamic_str}/{cfg.dynamic.frame_size}_{cfg.dynamic.ppf}_{cfg.dynamic.total_pixels}/{cfg.S}/{blind}/{cfg.N}/{noise_level}/") print(postfix,cfg.dynamic.total_pixels) cfg.model_path = cfg.model_path / postfix cfg.optim_path = cfg.optim_path / postfix if not cfg.model_path.exists(): cfg.model_path.mkdir(parents=True) if not cfg.optim_path.exists(): cfg.optim_path.mkdir(parents=True) checkpoint = cfg.model_path / Path("checkpoint_{}.tar".format(cfg.epochs)) # if checkpoint.exists(): return print("N: {} | Noise Level: {}".format(cfg.N,cfg.noise_params['g']['stddev'])) torch.cuda.set_device(gpuid) # -=-=-=-=-=-=-=-=-=- # load models # -=-=-=-=-=-=-=-=-=- # -- load model -- # model,criterion = load_burst_n2n_model(cfg) model_online,criterion = load_burst_kpn_model(cfg) optim_online = load_optimizer(cfg,model_online) model_target,_ = load_burst_kpn_model(cfg) optim_target = load_optimizer(cfg,model_target) nparams = count_parameters(model_online) print("Number of Trainable Parameters: {}".format(nparams)) print("PID: {}".format(os.getpid())) # -=-=-=-=-=-=-=-=-=- # load data # -=-=-=-=-=-=-=-=-=- # data,loader = load_dataset(cfg,'denoising') data,loader = load_dataset(cfg,'dynamic') # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # restore model from file # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- if cfg.load: fp = cfg.model_path / Path("checkpoint_30.tar") model = load_model_fp(cfg,model,fp,0) cfg.current_epoch = 0 te_ave_psnr = {} test_before = False if test_before: ave_psnr = test_loop_burstKPN(cfg,model,criterion,loader.te,-1) print("PSNR before training {:2.3e}".format(ave_psnr)) return if checkpoint.exists() and cfg.load: model = load_model_fp(cfg,model,checkpoint,gpuid) print("Loaded model.") cfg.current_epoch = cfg.epochs # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- # train & test over epochs # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- record_losses = pd.DataFrame({'kpn':[],'ot':[],'psnr':[],'psnr_std':[]}) use_record = False loss_type = "sup_r_ot" for epoch in range(cfg.current_epoch,cfg.epochs): print(cfg.desc) sys.stdout.flush() losses,record_losses = train_loop_burstKPN(cfg,model_target,model_online,optim_target,optim_online,criterion,loader.tr,epoch,record_losses) if use_record: write_record_losses_file(cfg.current_epoch,postfix,loss_type,record_losses) ave_psnr,record_test = test_loop_burstKPN(cfg,model_online,criterion,loader.te,epoch) if use_record: write_record_test_file(cfg.current_epoch,postfix,loss_type,record_test) te_ave_psnr[epoch] = ave_psnr cfg.current_epoch += 1 epochs,psnr = zip(*te_ave_psnr.items()) best_index = np.argmax(psnr) best_epoch,best_psnr = epochs[best_index],psnr[best_index] print(f"Best Epoch {best_epoch} | Best PSNR {best_psnr} | N: {cfg.N} | Blind: {blind}") root = Path(f"{settings.ROOT_PATH}/output/byol/{postfix}/") # if cfg.blind: root = root / Path(f"./blind/") # else: root = root / Path(f"./nonblind/") fn = Path(f"results.csv") if not root.exists(): root.mkdir(parents=True) path = root / fn with open(path,'w') as f: f.write("{:d},{:d},{:2.10e},{:d}\n".format(cfg.N,best_epoch,best_psnr,nparams)) save_model(cfg, model, optimizer)