def train_DA(epoch): net.train() params = list(net.parameters()) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=lr) if not os.path.exists(output_dir): os.mkdir(output_dir) train_loss = 0 data_loader = ImageDataLoader(train_path, train_gt_path, shuffle=True, gt_downsample=True, pre_load=False) best_mae = sys.maxsize step = -1 train_loss = 0 gt_count = 0 et_count = 0 for blob in data_loader: step = step + 1 im_data = blob['data'] gt_data = blob['gt_density'] dtype = torch.FloatTensor # certified input im_data = torch.from_numpy(im_data).type(dtype) im_data = im_data.to(device) im_data = random_mask_batch_one_sample(im_data, keep, reuse_noise=True) im_data = Variable(im_data) gt_data = torch.from_numpy(gt_data).type(dtype) gt_data = gt_data.to(device) gt_data = Variable(gt_data) density_map = net(im_data, gt_data) zzk_loss = net.loss train_loss += zzk_loss.item() gt_data = gt_data.data.detach().cpu().numpy() gt_count = np.sum(gt_data) density_map = density_map.data.detach().cpu().numpy() et_count = np.sum(density_map) print("gt_count: ", gt_count) print("et_count: ", et_count) optimizer.zero_grad() zzk_loss.backward() optimizer.step() train_loss = train_loss / data_loader.get_num_samples() if epoch % 100 == 0: save_name = os.path.join( output_dir, '{}_{}_{}.h5'.format(method, dataset_name, epoch)) network.save_net(save_name, net) return train_loss
if re_cnt: t.tic() re_cnt = False iter_time += iter_timer.toc(average=False) load_timer.tic() duration = outer_timer.toc(average=False) logging.info("epoch {}: {} seconds; Path: {}".format( epoch, duration, opt.expr_dir)) logging.info("load/iter/cuda: {} vs {} vs {} seconds; iter: {}".format( load_time, iter_time, net.cudaTimer.tot_time, net.cudaTimer.calls)) net.cudaTimer.tot_time = 0 save_name = os.path.join(opt.expr_dir, '%06d.h5' % epoch) network.save_net(save_name, net) if scheduler != None: scheduler.step() logging.info(scheduler.get_lr()) logging.info("Train loss: {}".format( train_loss / data_loader_train.get_num_samples())) if opt.use_tensorboard: try: vis_exp.add_scalar_value('train_loss', train_loss / data_loader_train.get_num_samples(), step=epoch) except:
def test(net,test_path,optimizer, num_epochs, Dataset=args.Dataset): if Dataset=="fdst": num_sessions=3 test_len=750 low_limit=451 high_limit=750 else: num_sessions=8 test_len=2000 low_limit=1201 high_limit=2000 #print(num_sessions) sessions_list = [] ses_size = 100 for i in range(low_limit, high_limit,ses_size): sessions_list.append(i) sessions_list.append(test_len) #print("test list: ", sessions_list) for test_inc in range(len(sessions_list)-1): start_frame = sessions_list[test_inc] end_frame = sessions_list[test_inc+1] #print('start:,end:', (start_frame,end_frame)) test_loader = ImageDataLoader_Val_Test(test_path, None,'test_split',start_frame, end_frame, shuffle=False, gt_downsample=True, pre_load=True, Dataset=args.Dataset) log_file = open(args.SAVE_ROOT+"/"+args.Dataset+"_test.log","w",1) log_print("test/Self Training ....", color='green', attrs=['bold']) # training train_loss = 0 step_cnt = 0 re_cnt = False t = Timer() t.tic() for epoch in range(1,num_epochs+1): step = -1 train_loss = 0 for blob in test_loader: step = step + 1 im_data = blob['data'] net.training = False gt_data = net(im_data) gt_data = gt_data.cpu().detach().numpy() net.training = True density_map = net(im_data, gt_data) loss = net.loss train_loss += loss.data step_cnt += 1 optimizer.zero_grad() loss.backward() optimizer.step() if step % disp_interval == 0: duration = t.toc(average=False) fps = step_cnt / duration gt_count = np.sum(gt_data) density_map = density_map.data.cpu().numpy() et_count = np.sum(density_map) utils.save_results(im_data,gt_data,density_map, args.SAVE_ROOT) log_text = 'epoch: %4d, step %4d, Time: %.4fs, gt_cnt: %4.1f, et_cnt: %4.1f' % (epoch, step, 1./fps, gt_count,et_count) log_print(log_text, color='green', attrs=['bold']) re_cnt = True if re_cnt: t.tic() re_cnt = False torch.backends.cudnn.enabled = True torch.backends.cudnn.benchmark = False session= str(sessions_list[test_inc]) network.save_net(args.SAVE_ROOT+'/'+args.Dataset+ session +'_self_trained_model_test.h5', net) output_dir = './densitymaps/' + args.Dataset + session net.cuda() net.eval() all_test_loader = ImageDataLoader(test_path, None, 'test_split', shuffle=False, gt_downsample=True, pre_load=True , Dataset=args.Dataset) for blob in all_test_loader: im_data = blob['data'] net.training = False density_map = net(im_data) density_map = density_map.data.cpu().numpy() new_dm= density_map.reshape([ density_map.shape[2], density_map.shape[3] ]) np.savetxt(output_dir + '_output_' + blob['fname'].split('.')[0] +'.csv', new_dm, delimiter=',', fmt='%.6f') return net
net.cuda() net.train() params = list(net.parameters()) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=args.learning_rate) if not os.path.exists(args.SAVE_ROOT): os.mkdir(args.SAVE_ROOT) # start training # train, validation/self training and testing model if args.MODE == 'all' or args.MODE == 'train': data_loader_train = ImageDataLoader(train_path, train_gt_path,'train_split', shuffle=False, gt_downsample=True, pre_load=True, Dataset=args.Dataset) net = train(net, data_loader_train,optimizer,args.MAX_EPOCHS) network.save_net(args.SAVE_ROOT+'/'+args.Dataset+'_trained_model.h5', net) if args.MODE == 'all' or args.MODE == 'val': net = val(net,val_path, optimizer,args.VAL_EPOCHS, Dataset=args.Dataset) network.save_net(args.SAVE_ROOT+'/'+args.Dataset+'_self_trained_model_val.h5', net) if args.MODE == 'all' or args.MODE == 'test': net = test(net,test_path, optimizer,args.VAL_EPOCHS, Dataset=args.Dataset) network.save_net(args.SAVE_ROOT+'/'+args.Dataset+'_self_trained_model_test.h5', net) #if args.MODE == 'eval_all' or args.MODE == 'eval_val': # eval_val(net, val_path) #if args.MODE == 'eval_all' or args.MODE == 'eval_test':
if train_batch_size > 1: print( 'epoch: %4d, step %6d, ground truth: %6.1f, estimate: %6.1f and etc' % (epoch, step, ground_truth_count, estimate_count), flush=True) else: print( 'epoch: %4d, step %6d, ground truth: %6.1f, estimate: %6.1f' % (epoch, step, ground_truth_count, estimate_count), flush=True) if is_save_model_in_epoch and number_of_train_samples % steps_to_save_model == 0: model_name = '{}_{}_{}.h5'.format(original_dataset_name, epoch, step) save_model_path = os.path.join(output_dir, model_name) network.save_net(save_model_path, net) # evaluate the model of this epoch evaluate_result_dict = dict() for data_name in best_result_dict: evaluate_data = all_data[data_name] result = evaluate_model(save_model_path, evaluate_data['data']) evaluate_result_dict[data_name] = result txt_log_info.append( 'evaluate %s on %s: mae: %6.2f, mse: %6.2f, psnr: %6.2f, ssim: %6.2f, game: %6.2f, %6.2f, %6.2f, %6.2f' % (result['name'], data_name, result['mae'], result['mse'], result['psnr'], result['ssim'], result['game_0'], result['game_1'], result['game_2'], result['game_3']))
def main(): # define output folder output_dir = './saved_models/' log_dir = './mae_mse/' checkpoint_dir = './checkpoint/' train_path = '/home/jake/Desktop/Projects/Python/dataset/SH_B/cooked/train/images' train_gt_path = '/home/jake/Desktop/Projects/Python/dataset/SH_B/cooked/train/ground_truth' val_path = '/home/jake/Desktop/Projects/Python/dataset/SH_B/cooked/val/images' val_gt_path = '/home/jake/Desktop/Projects/Python/dataset/SH_B/cooked/val/ground_truth' # last checkpoint checkpointfile = os.path.join(checkpoint_dir, 'checkpoint.94.pth.tar') # some description method = 'mcnn' dataset_name = 'SH_B' # log file f_train_loss = open(os.path.join(log_dir, "train_loss.csv"), "a+") f_val_loss = open(os.path.join(log_dir, "val_loss.csv"), "a+") # Training configuration start_epoch = 0 end_epoch = 97 lr = 0.00001 # momentum = 0.9 disp_interval = 1000 # log_interval = 250 # Flag CONTINUE_TRAIN = True # Tensorboard config # use_tensorboard = False # save_exp_name = method + '_' + dataset_name + '_' + 'v1' # remove_all_log = False # remove all historical experiments in TensorBoard # exp_name = None # the previous experiment name in TensorBoard # ----------------------------------------------------------------------------------------- rand_seed = 64678 if rand_seed is not None: np.random.seed(rand_seed) torch.manual_seed(rand_seed) torch.cuda.manual_seed(rand_seed) # Define network net = CrowdCounter() network.weights_normal_init(net, dev=0.01) # net.cuda() net.train() # params = list(net.parameters()) optimizer = torch.optim.Adam( filter(lambda p: p.requires_grad, net.parameters()), lr=lr) if not os.path.exists(output_dir): os.mkdir(output_dir) # # tensorboad # use_tensorboard = use_tensorboard and CrayonClient is not None # if use_tensorboard: # cc = CrayonClient(hostname='127.0.0.1') # if remove_all_log: # cc.remove_all_experiments() # if exp_name is None: # exp_name = save_exp_name # exp = cc.create_experiment(exp_name) # else: # exp = cc.open_experiment(exp_name) # training param if CONTINUE_TRAIN: net, optimizer, start_epoch = utils.load_checkpoint( net, optimizer, filename=checkpointfile) train_loss = 0 step_cnt = 0 re_cnt = False t = Timer() t.tic() # Load data data_loader = ImageDataLoader( train_path, train_gt_path, shuffle=True, gt_downsample=True, pre_load=True) data_loader_val = ImageDataLoader( val_path, val_gt_path, shuffle=False, gt_downsample=True, pre_load=True) best_mae = sys.maxsize # Start training for this_epoch in range(start_epoch, end_epoch-1): step = -1 train_loss = 0 for blob in data_loader: step += 1 img_data = blob['data'] gt_data = blob['gt_density'] et_data = net(img_data, gt_data) loss = net.loss train_loss += loss.data step_cnt += 1 optimizer.zero_grad() loss.backward() optimizer.step() if step % disp_interval == 0: duration = t.toc(average=False) fps = step_cnt / duration gt_count = np.sum(gt_data) et_data = et_data.data.cpu().numpy() et_count = np.sum(et_data) utils.save_results(img_data, gt_data, et_data, output_dir, fname="{}.{}.png".format(this_epoch, step)) log_text = 'epoch: %4d, step %4d, Time: %.4fs, gt_cnt: %4.1f, et_cnt: %4.1f' % (this_epoch, step, 1./fps, gt_count, et_count) log_print(log_text, color='green', attrs=['bold']) re_cnt = True if re_cnt: t.tic() re_cnt = False # Save checkpoint state = {'epoch': this_epoch, 'state_dict': net.state_dict(), 'optimizer': optimizer.state_dict()} cp_filename = "checkpoint.{}.pth.tar".format(this_epoch) torch.save(state, os.path.join(checkpoint_dir, cp_filename)) # ========================== END 1 EPOCH================================================================================== train_mae, train_mse = evaluate_network(net, data_loader) f_train_loss.write("{},{}\n".format(train_mae, train_mse)) log_text = 'TRAINING - EPOCH: %d, MAE: %.1f, MSE: %0.1f' % ( this_epoch, train_mae, train_mse) log_print(log_text, color='green', attrs=['bold']) # =====================================================VALIDATION========================================================= # calculate error on the validation dataset val_mae, val_mse = evaluate_network(net, data_loader_val) f_val_loss.write("{},{}\n".format(val_mae, val_mse)) log_text = 'VALIDATION - EPOCH: %d, MAE: %.1f, MSE: %0.1f' % ( this_epoch, val_mae, val_mse) log_print(log_text, color='green', attrs=['bold']) # SAVE model is_save = False if val_mae <= best_mae: if val_mae < best_mae: is_save = True best_mae = val_mae best_mse = val_mse else: if val_mse < best_mse: is_save = True best_mse = val_mse if is_save: save_name = os.path.join(output_dir, '{}_{}_{}.h5'.format( method, dataset_name, this_epoch)) network.save_net(save_name, net) best_model = '{}_{}_{}.h5'.format(method, dataset_name, this_epoch) log_text = 'BEST MAE: %0.1f, BEST MSE: %0.1f, BEST MODEL: %s' % ( best_mae, best_mse, best_model) log_print(log_text, color='green', attrs=['bold']) # if use_tensorboard: # exp.add_scalar_value('MAE', mae, step=epoch) # exp.add_scalar_value('MSE', mse, step=epoch) # exp.add_scalar_value('train_loss', train_loss / # data_loader.get_num_samples(), step=epoch) f_train_loss.close() f_val_loss.close()