def train(train_loader, model, criterion, optimizer, epoch, log, tf_writer): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() if args.no_partialbn: try: model.module.partialBN(False) except: model.partialBN(False) else: try: model.module.partialBN(True) except: model.partialBN(True) model.train() end = time.time() for idx, (input, target) in enumerate(train_loader): data_time.update(time.time() - end) input, target = input.cuda(), target.cuda() output = model(input) loss = criterion(output, target) # accuracy and loss prec1, = accuracy(output.data, target, topk=(1, )) losses.update(loss.item(), input.size(0)) top1.update(prec1.item(), input.size(0)) # gradient and optimizer loss.backward() if (idx + 1) % args.update_weight == 0: optimizer.step() optimizer.zero_grad() # time batch_time.update(time.time() - end) end = time.time() if (idx + 1) % args.print_freq == 0: output = ('Train: epoch-{0} ({1}/{2})\t' 'batch_time {batch_time.avg:.2f}\t\t' 'data_time {data_time.avg:.2f}\t\t' 'loss {loss.avg:.3f}\t' 'prec@1 {top1.avg:.2f}\t'.format(epoch, idx + 1, len(train_loader), batch_time=batch_time, data_time=data_time, loss=losses, top1=top1)) batch_time.reset() data_time.reset() losses.reset() top1.reset() print(output) log.write(output + '\n') log.flush() tf_writer.add_scalar('loss/train', losses.avg, epoch) tf_writer.add_scalar('acc/train_top1', top1.avg, epoch) tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch)
def main(args): print('[MAIN] Experiment {} start!'.format(args.exp_name)) # define necessary variable torch.set_num_threads(1) feature_length = 800 filepath = 'None' obs_shape = [800, 800, 800] num_class = 101 log_file = "result/rl/" + args.exp_name + "_log.csv" num_updates = int(args.num_frames) // args.num_steps // args.num_processes with open(log_file, 'w') as f: f.write( 'updates,num_timesteps,FPS,mean_reward,median_reward,min_reward,max_reward,entropy,value_loss,policy_loss,clf_loss,score,all_top1,all_top5\n' ) # define classifier i3d_model_checkpoint = "result/0804_1708_e2e_ucf_model.pth.tar" clf = Classifier(feature_length, num_class, isbn=False) clf = load_clf_from_i3d(clf, i3d_model_checkpoint) clf = torch.nn.DataParallel( clf, device_ids=[i for i in range(torch.cuda.device_count())]).cuda() # clf_criterion = torch.nn.CrossEntropyLoss().cuda() # clf_optimizer = torch.optim.Adam(clf.parameters(), lr=args.lr) # define dataset train_dataset = FeatureDataset('features/thumos14/val/data.csv') eval_dataset = FeatureDataset( 'features/thumos14/test/data.csv', is_thumos14_test_folder=True) # eval detection # define environment fuser = Fuser(fuse_type='average') envs = [] for i in range(args.num_processes): print("[MAIN]\tBegin prepare the {}th env!".format(i)) envs.append( make_env(dataset=train_dataset, classifier=clf, fuser=fuser, observation_space=obs_shape, index=int(i), threshold=0.4)) if args.num_processes > 1: envs = SubprocVecEnv(envs) else: envs = DummyVecEnv(envs) envs = VecNormalize(envs, ob=False, ret=False, gamma=args.gamma) # define actor actor_critic = Policy(obs_shape, envs.action_space, output_size=256) if args.cuda: actor_critic.cuda() # define actor's update algorithm if args.algo == 'a2c': agent = A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, alpha=args.alpha, max_grad_norm=args.max_grad_norm) elif args.algo == 'ppo': agent = PPO(actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm) elif args.algo == 'acktr': agent = A2C_ACKTR(actor_critic, args.value_loss_coef, args.entropy_coef, acktr=True) # prepare rollouts/observation rollouts = RolloutStorage(args.num_steps, args.num_processes, (sum(obs_shape), ), envs.action_space, 1) current_obs = torch.zeros(args.num_processes, (sum(obs_shape, ))) def update_current_obs(obs, current_obs): print(envs.observation_space.shape) shape_dim0 = envs.observation_space.shape[0] obs = torch.from_numpy(obs).float() current_obs[:, -shape_dim0:] = obs return current_obs obs = envs.reset() current_obs = update_current_obs(obs, current_obs) rollouts.observations[0].copy_(current_obs) if args.cuda: current_obs = current_obs.cuda() rollouts.cuda() # These variables are used to log training. episode_rewards = torch.zeros([args.num_processes, 1]) final_rewards = torch.zeros([args.num_processes, 1]) score = AverageMeter() avg_prop_length = AverageMeter() start = time.time() top1 = top5 = -1 # start training for j in range(num_updates): score.reset() if j == 10: break for step in range(args.num_steps): # Sample actions with torch.no_grad(): value, action, action_log_prob, states = actor_critic.act( rollouts.observations[step], rollouts.states[step], rollouts.masks[step]) cpu_actions = action.squeeze(1).cpu().numpy() # Here is the step! obs, reward, done, info = envs.step(cpu_actions) print( "[MAIN]\tIn updates {}, step {}, startframe {}, endframe {}, totleframe {}, action{}, reward {}, prop_s {}, start_s {}, end_s {}".format( j, \ step, [i['start_frame'] for i in info], [i['end_frame'] for i in info], [i['max_frame'] * 16 + 15 for i in info], cpu_actions, reward, [i['proposal_score'] for i in info], [i['start_score'] for i in info], [i['end_score'] for i in info])) reward = torch.from_numpy(np.expand_dims(np.stack(reward), 1)).float() episode_rewards += reward label = torch.from_numpy( np.expand_dims(np.stack([i['label'] for i in info]), 1)).float() # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) final_rewards *= masks final_rewards += (1 - masks) * episode_rewards episode_rewards *= masks score.update( ((1 - masks.numpy()) * np.array([i['proposal_score'] for i in info])).mean(), n=np.sum(1 - masks.numpy(), dtype=np.int32)) avg_prop_length.update(np.mean( (1 - masks.numpy()) * np.array([i['start_frame'] - i['end_frame'] for i in info])), n=np.sum(1 - masks.numpy(), dtype=np.int32)) if args.cuda: masks = masks.cuda() if current_obs.dim() == 4: current_obs *= masks.unsqueeze(2).unsqueeze(2) elif current_obs.dim() == 2: current_obs *= masks else: current_obs *= masks.unsqueeze(2) update_current_obs(obs, current_obs) rollouts.insert(current_obs, states, action, action_log_prob, value, reward, masks, label) with torch.no_grad(): next_value = actor_critic.get_value(rollouts.observations[-1], rollouts.states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau) value_loss, action_loss, dist_entropy = agent.update(rollouts) rollouts.after_update() clf_loss = 0 # if j > 200: # clf_loss = train_classifier(data=rollouts, model=clf, criterion=clf_criterion, optimizer=clf_optimizer) if j % args.save_interval == 0 and args.save_dir != "": save_path = os.path.join(args.save_dir, args.algo) try: os.makedirs(save_path) except OSError: pass state = {'updates': j + 1, 'state_dict': actor_critic.state_dict()} filepath = os.path.join( save_path, args.exp_name + "_up{:06d}_model.pth.tar".format(j + 1)) torch.save(state, filepath) # if j % args.clf_test_interval == 0: # top1, top5 = validate(val_loader=eval_loader, model=clf, criterion=clf_criterion) if j % args.log_interval == 0: end = time.time() total_num_steps = (j + 1) * args.num_processes * args.num_steps print( "[MAIN]\tUpdates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}, score {:.5f}" .format(j, total_num_steps, int(total_num_steps / (end - start)), final_rewards.mean(), final_rewards.median(), final_rewards.min(), final_rewards.max(), dist_entropy, value_loss, action_loss, score.avg)) if top1: print('[MAIN]\tCLF TEST RUNNED! Top1 {}, TOP5 {}'.format( top1, top5)) with open(log_file, 'a') as f: f.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format( j, total_num_steps, int(total_num_steps / (end - start)), final_rewards.mean(), final_rewards.median(), final_rewards.min(), final_rewards.max(), dist_entropy, value_loss, action_loss, clf_loss, score.avg, top1, top5)) top1 = top5 = None return filepath