def run(): score_max = -1.0 best_epoch = 0 weight = torch.from_numpy(train_data.weight).float() # weight for all class weight = to_var(weight) # optimizer = optim.Adam(params=net.parameters(), lr=learning_rate, betas=(0.9, 0.999)) criterion = nn.CrossEntropyLoss(weight=weight) for epoch in range(1, epochs + 1): print('epoch....................................' + str(epoch)) train_loss = [] # *************** train model *************** print('train ....') net.train() for step, (image, label, index) in enumerate(train_dataset): image = to_var(image) # 4D tensor bz * 4(modal) * 240 * 240 label = to_var(label) # 3D tensor bz * 240 * 240 (value 0-4) optimizer.zero_grad() # predicts = net(image) # 4D tensor bz * 5(class) * 240 * 240 loss_train = criterion(predicts, label.long()) train_loss.append(float(loss_train)) loss_train.backward() optimizer.step() # ****** save sample image for each epoch ****** if step % 200 == 0: print('..step ....%d' % step) print('....loss....%f' %loss_train) predicts = one_hot_reverse(predicts) # 3D long Tensor bz * 240 * 240 (val 0-4) save_train_images(image, predicts, label, index, epoch, save_dir=save_dir) # ***************** calculate valid loss ***************** print('valid ....') current_score, valid_loss = evaluation(net, valid_dataset, criterion, save_dir=None) # **************** save loss for one batch **************** print('train_epoch_loss ' + str(sum(train_loss) / (len(train_loss) * 1.0)) ) print('valid_epoch_loss ' + str(sum(valid_loss) / (len(valid_loss) * 1.0)) ) # **************** save model **************** if current_score > score_max: best_epoch = epoch torch.save(net.state_dict(), os.path.join(save_dir , 'best_epoch.pth')) score_max = current_score print('valid_meanIoU_max ' + str(score_max)) print('Current Best epoch is %d' % best_epoch) if epoch == epochs: torch.save(net.state_dict(), os.path.join(save_dir, 'final_epoch.pth')) print('Best epoch is %d' % best_epoch) print('done!')
def Training(): print('Training') #################################################################### ## Hyper parameter #################################################################### print('Initializing hyper parameter') vis = visdom.Visdom() loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='epoch', ylabel='Loss', title='Training Loss', legend=['Loss'])) ######################################################################### ## Get dataset ######################################################################### print("Get dataset") loader = Generator() ############################## ## Get agent and model ############################## print('Get agent') if p.model_path == "": lane_assistant = Assistant.Assistant() else: lane_assistant = Assistant.Assistant() lane_assistant.load_weights(1912, "tensor(0.9420)") ############################## ## Check GPU ############################## print('Setup GPU mode') if torch.cuda.is_available(): lane_assistant.cuda() #torch.backends.cudnn.benchmark=True ############################## ## Loop for training ############################## print('Training loop') step = 0 sampling_list = None for epoch in range(p.n_epoch): lane_assistant.training_mode() for inputs, target_lanes, target_h, test_image, data_list in loader.Generate( sampling_list): #training #util.visualize_points(inputs[0], target_lanes[0], target_h[0]) print("epoch : " + str(epoch)) print("step : " + str(step)) loss_p = lane_assistant.train(inputs, target_lanes, target_h, epoch, lane_assistant, data_list) torch.cuda.synchronize() loss_p = loss_p.cpu().data if step % 50 == 0: vis.line(X=torch.ones((1, 1)).cpu() * int(step / 50), Y=torch.Tensor([loss_p]).unsqueeze(0).cpu(), win=loss_window, update='append') if step % 100 == 0: lane_assistant.save_model(int(step / 100), loss_p) testing(lane_assistant, test_image, step, loss_p) step += 1 sampling_list = copy.deepcopy(lane_assistant.get_data_list()) lane_assistant.sample_reset() #evaluation if epoch >= 0 and epoch % 1 == 0: print("evaluation") lane_assistant.evaluate_mode() th_list = [0.8] index = [3] lane_assistant.save_model(int(step / 100), loss_p) for idx in index: print("generate result") test.evaluation(loader, lane_assistant, index=idx, name="test_result_" + str(epoch) + "_" + str(idx) + ".json") for idx in index: print("compute score") with open( "/home/kym/Dropbox/eval_result2_" + str(idx) + "_.txt", 'a') as make_file: make_file.write("epoch : " + str(epoch) + " loss : " + str(loss_p.cpu().data)) make_file.write( evaluation.LaneEval.bench_one_submit( "test_result_" + str(epoch) + "_" + str(idx) + ".json", "test_label.json")) make_file.write("\n") with open("eval_result_" + str(idx) + "_.txt", 'a') as make_file: make_file.write("epoch : " + str(epoch) + " loss : " + str(loss_p.cpu().data)) make_file.write( evaluation.LaneEval.bench_one_submit( "test_result_" + str(epoch) + "_" + str(idx) + ".json", "test_label.json")) make_file.write("\n") if int(step) > 700000: break
## ENTRAINEMENT DU MODELE ## # a partir des fonctions presente dans train.py # Pour le CNN, on doit reshape les data x_train au format (*, 28,28,1) x_train = np.reshape(x_train.values, (-1, 28, 28, 1)) #print(x_train.shape) entrainement(x_train, y_train, model1, 'categorical_crossentropy', 'adam', ['accuracy']) ''' On visualisera l'apprentissage sur Tensorboard. Pour cela il faut taper la commande "tensorboard --logdir trainings", car trainings est le dossier dans lequel s'enregistrent les data d'apprentissage. Puis il faut ouvrir l'url que nous renvoie le terminal. (pour moi: http://localhost:6006/) ''' ## EVALUATION ## # a partir des fonctions presente dans test.py p_train, train_acc, train_cmat = evaluation(x_train, y_train, model1) #print("Confusion train\n", train_cmat) #print("Acc train\n", train_acc) ## TEST DE NOTRE MODELE SUR DE NOUVELLES DATA ## x_test = importation("data/test.csv") x_test = keras.utils.normalize(x_test) x_test = np.reshape(x_test.values, (-1, 28, 28, 1)) p_test = model1.predict(x_test) submission('submission_cnn.csv', p_test)
def Training(): print('Training') #################################################################### ## Hyper parameter #################################################################### print('Initializing hyper parameter') vis = visdom.Visdom() loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='epoch', ylabel='Loss', title='Training Loss', legend=['Loss'])) ######################################################################### ## Get dataset ######################################################################### print("Get dataset") loader = Generator() ############################## ## Get agent and model ############################## print('Get agent') if p.model_path == "": lane_agent = agent.Agent() else: lane_agent = agent.Agent() lane_agent.load_weights(4235, "tensor(0.2127)") ############################## ## Check GPU ############################## print('Setup GPU mode') if torch.cuda.is_available(): lane_agent.cuda() ############################## ## Loop for training ############################## print('Training loop') step = 0 for epoch in range(p.n_epoch): lane_agent.training_mode() for inputs, target_lanes, target_h, test_image in loader.Generate(): #training print("epoch : " + str(epoch)) print("step : " + str(step)) loss_p = lane_agent.train(inputs, target_lanes, target_h, epoch, lane_agent) loss_p = loss_p.cpu().data if step % 50 == 0: vis.line(X=torch.ones((1, 1)).cpu() * int(step / 50), Y=torch.Tensor([loss_p]).unsqueeze(0).cpu(), win=loss_window, update='append') if step % 100 == 0: lane_agent.save_model(int(step / 100), loss_p) testing(lane_agent, test_image, step, loss_p) step += 1 #evaluation if epoch > 0 and epoch % 10 == 0: print("evaluation") lane_agent.evaluate_mode() th_list = [0.3, 0.5, 0.7] lane_agent.save_model(int(step / 100), loss_p) for th in th_list: print("generate result") print(th) test.evaluation(loader, lane_agent, thresh=th, name="test_result_" + str(epoch) + "_" + str(th) + ".json") for th in th_list: print("compute score") print(th) with open("eval_result_" + str(th) + "_.txt", 'a') as make_file: make_file.write("epoch : " + str(epoch) + " loss : " + str(loss_p.cpu().data)) make_file.write( evaluation.LaneEval.bench_one_submit( "test_result_" + str(epoch) + "_" + str(th) + ".json", "test_label.json")) make_file.write("\n") if int(step) > 700000: break
def run_exp(config): warnings.filterwarnings('ignore') logger = config.get_logger('train') leaderboard_path = config._args.leaderboard Path(leaderboard_path).parent.mkdir(exist_ok=True, parents=True) with open(leaderboard_path, 'a') as f: txt_path = f"{config._log_dir}/preds.txt" print(txt_path, file=f, flush=True) expert_dims, raw_input_dims = compute_dims(config, logger) trn_config = compute_trn_config(config) if config._args.group_seed: seeds = [int(config._args.group_seed)] else: seeds = [int(x) for x in config._args.seeds.split(",")] # set up local filesystem on the cluster if socket.gethostname().endswith("cluster"): os.system(str(Path.home() / "configure_tmp_data.sh")) for ii, seed in enumerate(seeds): tic = time.time() logger.info(f"{ii + 1}/{len(seeds)} Setting experiment random seed to {seed}") set_seeds(seed) config["seed"] = seed # We use cls defaults for backwards compatibility with the MMIT configs. In the # long run this should be handled by the json configs themselves cls_defaults = ["train", "val", "tiny", "challenge"] model = config.init( name='arch', module=module_arch, expert_dims=expert_dims, text_dim=config["experts"]["text_dim"], disable_nan_checks=config["disable_nan_checks"], spatial_feats=config["data_loader"]["args"].get("spatial_feats", False), task=config.get("task", "retrieval"), ce_shared_dim=config["experts"].get("ce_shared_dim", None), feat_aggregation=config["data_loader"]["args"]["feat_aggregation"], trn_config=trn_config, trn_cat=config["data_loader"]["args"].get("trn_cat", 0), ) logger.info(model) data_loaders = config.init( name='data_loader', module=module_data, logger=logger, raw_input_dims=raw_input_dims, text_feat=config["experts"]["text_feat"], text_dim=config["experts"]["text_dim"], text_agg=config["experts"]["text_agg"], use_zeros_for_missing=config["experts"].get("use_zeros_for_missing", False), task=config.get("task", "retrieval"), cls_partitions=config.get("cls_partitions", cls_defaults) ) if config.get("manual_linear_init", False): logger.info("manually setting init for linear layers") def init_weights(m): if isinstance(m, nn.Linear): torch.nn.init.xavier_uniform(m.weight) m.bias.data.fill_(0.01) model.apply(init_weights) loss = config.init(name="loss", module=module_loss) metrics = [getattr(module_metric, met) for met in config['metrics']] trainable_params = filter(lambda p: p.requires_grad, model.parameters()) if config["optimizer"]["type"] == "RAdam": optimizer = config.init('optimizer', radam, trainable_params) elif config["optimizer"]["type"] == "Ranger": optimizer = config.init('optimizer', ranger, trainable_params) elif config["optimizer"]["type"] == "SWATS": optimizer = config.init('optimizer', swats, trainable_params) else: optimizer = config.init('optimizer', torch.optim, trainable_params) if config["lr_scheduler"]["type"] == "StepLR": lr_scheduler = config.init('lr_scheduler', torch.optim.lr_scheduler, optimizer) else: lr_scheduler = config.init('lr_scheduler', cos_restart, optimizer) visualizer = config.init( name='visualizer', module=module_vis, exp_name=config._exper_name, web_dir=config._web_log_dir, ) trainer = Trainer( model, loss, metrics, optimizer, config=config, data_loaders=data_loaders, lr_scheduler=lr_scheduler, mini_train=config._args.mini_train, disable_nan_checks=config["disable_nan_checks"], visualizer=visualizer, val_freq=config["trainer"].get("val_freq", 1), force_cpu_val=config.get("force_cpu_val", False), skip_first_n_saves=config["trainer"].get("skip_first_n_saves", 0), include_optim_in_ckpts=config["trainer"].get("include_optim_in_ckpts", 1), cache_targets=set(config.get("cache_targets", [])), ) trainer.train() best_ckpt_path = config.save_dir / "trained_model.pth" duration = time.strftime('%Hh%Mm%Ss', time.gmtime(time.time() - tic)) logger.info(f"Training took {duration}") if config._config.get("eval_settings", False): eval_config = copy.deepcopy(config) merge(eval_config._config, config["eval_settings"], strategy=Strategy.REPLACE) eval_config._args.resume = best_ckpt_path evaluation(eval_config, logger=logger, trainer=trainer) # If multiple runs were conducted, report relevant statistics if len(seeds) > 1: log_summary( logger=logger, log_path=config.log_path, eval_mode=config["eval_mode"], fixed_num_epochs=config["trainer"]["epochs"], ) print(f"Log file stored at {config.log_path}") # Report the location of the "best" checkpoint of the final seeded run (here # "best" corresponds to the model with the highest geometric mean over the # R@1, R@5 and R@10 metrics when a validation set is used, or simply the final # epoch of training for fixed-length schedules). print(f"The best performing ckpt can be found at {str(best_ckpt_path)}")
def main(): args = parse_parameters() print('*' * 50) print('DeepLab_v3_plus <==> Prepare for Dataset <==> Begin') train_dataloader = prepare_for_train_dataloader( args.dataroot, bs_train=args.train_batchsize_per_gpu * args.gpu, shuffle=True, num_workers=args.j, check_dataloader=args.check_dataloader) val_dataloader = prepare_for_val_dataloader(args.dataroot, bs_val=args.test_batchsize, shuffle=False, num_workers=args.j) print('DeepLab_v3_plus <==> Prepare for Dataset <==> Done\n\n') sys.exit(0) # network print('*' * 50) print('DeepLab_v3_plus <==> Prepare for Network <==> Begin') net = DeepLabv3_plus(nInputChannels=3, n_classes=21, os=16, pretrained=True, _print=True) optimizer = torch.optim.SGD(net.parameters(), lr=args.init_lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = cross_entropy2d net = nn.DataParallel(net, device_ids=range(args.gpu)) if args.gpu else net net = net.cuda() cudnn.benchmark = True print('DeepLab_v3_plus <==> Resume Network checkpoint <==> Begin') if args.resume: if os.path.isfile(args.resume): checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] + 1 net.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print( 'DeepLab_v3_plus <==> Resume Network checkpoint, next epoch:{}<==> Begin' .format(start_epoch)) else: start_epoch = 0 print('DeepLab_v3_plus <==> Prepare for Network <==> Done\n\n') global_step = 0 running_loss_tr = 0.0 print('*' * 50) for epoch in range(start_epoch, args.max_epoches): # train lr, optimizer = adjust_learning_rate( args.init_lr, optimizer, epoch, args.max_epoches, gamma=0.9, decay_step=args.decay_every_epoches) train_deeplab_v3_plus(train_dataloader, net, criterion, optimizer, epoch, global_step, running_loss_tr) if epoch % args.test_epoches == 0: evaluation(val_dataloader, net, epoch, save_dir=args.log_test_dir) if epoch % args.save_weights_epoches == 0: save_checkpoint( { 'epoch': epoch, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict(), }, weights_dir=args.weights)
def Training(): print('Training') # Hyper parameter print('Initializing hyper parameter') vis = visdom.Visdom() loss_window = vis.line(X=torch.zeros((1, )).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='epoch', ylabel='Loss', title='Training Loss', legend=['Loss'])) ## Get dataset print("Get dataset") loader = Generator() ## Get agent and model print('Get agent') if p.model_path == "": lane_agent = agent.Agent() else: lane_agent = agent.Agent() lane_agent.load_weights(1912, "tensor(0.9420)") ## Loop for training print('Training loop') step = 0 sampling_list = None for epoch in range(p.n_epoch): lane_agent.training_mode() for inputs, target_lanes, target_h, test_image, data_list in loader.Generate( sampling_list): #training #util.visualize_points(inputs[0], target_lanes[0], target_h[0]) print("epoch : " + str(epoch)) print("step : " + str(step)) loss_p = lane_agent.train(inputs, target_lanes, target_h, epoch, lane_agent, data_list) torch.cuda.synchronize() loss_p = loss_p.cpu().data if step % 50 == 0: vis.line(X=torch.ones((1, 1)).cpu() * int(step / 50), Y=torch.Tensor([loss_p]).unsqueeze(0).cpu(), win=loss_window, update='append') if step % 100 == 0: lane_agent.save_model(int(step / 100), loss_p) testing(lane_agent, test_image, step, loss_p) step += 1 sampling_list = copy.deepcopy(lane_agent.get_data_list()) lane_agent.sample_reset() #evaluation if epoch >= 0 and epoch % 1 == 0: print("evaluation") lane_agent.evaluate_mode() th_list = [0.8] index = [3] lane_agent.save_model(int(step / 100), loss_p) for idx in index: print("generate result") test.evaluation(loader, lane_agent, index=idx, name="test_result_" + str(epoch) + "_" + str(idx) + ".json") if int(step) > 700000: break
def main(config): logger = config.get_logger('train') expert_dims, raw_input_dims = compute_dims(config, logger) seeds = [int(x) for x in config._args.seeds.split(",")] for seed in seeds: # Set the random initial seeds tic = time.time() logger.info(f"Setting experiment random seed to {seed}") random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) data_loaders = config.init( name='data_loader', module=module_data, raw_input_dims=raw_input_dims, text_feat=config["experts"]["text_feat"], text_dim=config["experts"]["text_dim"], ) model = config.init( name='arch', module=module_arch, expert_dims=expert_dims, text_dim=config["experts"]["text_dim"], disable_nan_checks=config["disable_nan_checks"], ) logger.info(model) loss = config.init(name="loss", module=module_loss) metrics = [getattr(module_metric, met) for met in config['metrics']] trainable_params = filter(lambda p: p.requires_grad, model.parameters()) optimizer = config.init('optimizer', torch.optim, trainable_params) lr_scheduler = config.init('lr_scheduler', torch.optim.lr_scheduler, optimizer) visualizer = config.init( name='visualizer', module=module_vis, exp_name=config._exper_name, log_dir=config._web_log_dir, ) trainer = Trainer( model, loss, metrics, optimizer, config=config, data_loaders=data_loaders, lr_scheduler=lr_scheduler, mini_train=config._args.mini_train, disable_nan_checks=config["disable_nan_checks"], visualizer=visualizer, skip_first_n_saves=config["trainer"].get("skip_first_n_saves", 0), include_optim_in_ckpts=config["trainer"].get( "include_optim_in_ckpts", False), ) trainer.train() best_ckpt_path = config.save_dir / "trained_model.pth" duration = time.strftime('%Hh%Mm%Ss', time.gmtime(time.time() - tic)) logger.info(f"Training took {duration}") # If the dataset supports separate validation/test splits, the training config # json should specify an `eval_config` entry with the path to the test # configuration if config._config.get("eval_config", False): eval_args = argparse.ArgumentParser() eval_args.add_argument("--config", default=config["eval_config"]) eval_args.add_argument("--device", default=config._args.device) eval_args.add_argument("--resume", default=best_ckpt_path) eval_config = ConfigParser(eval_args, slave_mode=True) evaluation(eval_config, logger=logger) # If multiple runs were conducted, report relevant statistics if len(seeds) > 1: log_summary( logger=logger, log_path=config.log_path, eval_mode=config["eval_mode"], fixed_num_epochs=config["trainer"]["epochs"], ) print(f"Log file stored at {config.log_path}") # Report the location of the "best" checkpoint of the final seeded run (here # "best" corresponds to the model with the highest geometric mean over the # R@1, R@5 and R@10 metrics when a validation set is used, or simply the final # epoch of training for fixed-length schedules). print(f"The best performing ckpt can be found at {str(best_ckpt_path)}")
def Training(): print('Training') #################################################################### ## Hyper parameter #################################################################### print('Initializing hyper parameter') ######################################################################### ## Get dataset ######################################################################### print("Get dataset") loader = Generator() ############################## ## Get agent and model ############################## print('Get agent') if p.model_path == "": lane_agent = agent.Agent() lane_agent.load_weights(50, "tensor(0.2378)", False) # quan model else: lane_agent = agent.Agent() lane_agent.load_weights(50, "tensor(0.2378)") # quan model ############################## ## Check GPU ############################## print('Setup GPU mode') if torch.cuda.is_available(): lane_agent.cuda() ############################## ## Loop for training ############################## print('Training loop') step = 0 sampling_list = None for epoch in range(p.n_epoch): lane_agent.training_mode() for inputs, target_lanes, target_h, test_image, data_list in loader.Generate( sampling_list): #training print("epoch : " + str(epoch)) print("step : " + str(step)) loss_p = lane_agent.train(inputs, target_lanes, target_h, epoch, lane_agent, data_list) torch.cuda.synchronize() loss_p = loss_p.cpu().data step += 1 sampling_list = copy.deepcopy(lane_agent.get_data_list()) lane_agent.sample_reset() #evaluation if epoch >= 0 and epoch % 1 == 0: print("evaluation") lane_agent.evaluate_mode() th_list = [0.8] index = [3] lane_agent.save_model(int(step / 100), loss_p) for idx in index: print("generate result") test.evaluation(loader, lane_agent, index=idx, name="./eval_res/test_result_" + str(epoch) + "_" + str(idx) + ".json") for idx in index: print("compute score") with open("./eval_res/eval_acc.txt", 'a') as make_file: make_file.write("epoch : " + str(epoch) + " loss : " + str(loss_p.cpu().data)) make_file.write( evaluation.LaneEval.bench_one_submit( "./eval_res/test_result_" + str(epoch) + "_" + str(idx) + ".json", "test_label.json")) make_file.write("\n") if int(step) > 50000: break if epoch > 20: # Freeze quantizer parameters lane_agent.d_observer() if epoch > 20: # Freeze batch norm mean and variance estimates lane_agent.freeze_bn()
def Training(): print('Training') #################################################################### ## Hyper parameter #################################################################### print('Initializing hyper parameter') vis = visdom.Visdom() loss_window = vis.line(X=torch.zeros((1,)).cpu(), Y=torch.zeros((1)).cpu(), opts=dict(xlabel='epoch', ylabel='Loss', title='Training Loss', legend=['Loss'])) ######################################################################### ## Get dataset ######################################################################### print("Get dataset") loader = Generator() ############################## ## Get agent and model ############################## print('Get agent') if p.model_path == "": lane_agent = agent.Agent() else: lane_agent = agent.Agent() lane_agent.load_weights(0, "tensor(1.3984)") ############################## ## Check GPU ############################## print('Setup GPU mode') if torch.cuda.is_available(): lane_agent.cuda() #torch.backends.cudnn.benchmark=True ############################## ## Loop for training ############################## print('Training loop') step = 0 sampling_list = None for epoch in range(p.n_epoch): lane_agent.training_mode() for inputs, target_lanes, target_h, test_image, data_list in loader.Generate(sampling_list): #util.visualize_points(inputs[0], target_lanes[0], target_h[0]) #training print("epoch : " + str(epoch)) print("step : " + str(step)) loss_p = lane_agent.train(inputs, target_lanes, target_h, epoch, lane_agent, data_list) torch.cuda.synchronize() loss_p = loss_p.cpu().data if step%50 == 0: vis.line( X=torch.ones((1, 1)).cpu() * int(step/50), Y=torch.Tensor([loss_p]).unsqueeze(0).cpu(), win=loss_window, update='append') if step%100 == 0: lane_agent.save_model(int(step/100), loss_p) testing(lane_agent, test_image, step, loss_p) step += 1 sampling_list = copy.deepcopy(lane_agent.get_data_list()) lane_agent.sample_reset() #evaluation if epoch%1 == 0: print("evaluation") lane_agent.evaluate_mode() th_list = [0.9] index = [3] lane_agent.save_model(int(step/100), loss_p) for idx in index: print("generate result") test.evaluation(loader, lane_agent, index = idx, name="test_result_"+str(epoch)+"_"+str(idx)+".json") name = "epoch_idx_"+str(epoch) + str(idx) + str(step/100) os.system("sh /home/kym/research/autonomous_car_vision/lane_detection/code/ITS/CuLane/evaluation_code/SCNN_Pytorch/utils/lane_evaluation/CULane/Run.sh " + name) if int(step)>700000: break