Ejemplo n.º 1
0
def train(train_loader, model, criterion, optimizer, epoch, log, tf_writer):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    if args.no_partialbn:
        try:
            model.module.partialBN(False)
        except:
            model.partialBN(False)
    else:
        try:
            model.module.partialBN(True)
        except:
            model.partialBN(True)
    model.train()

    end = time.time()
    for idx, (input, target) in enumerate(train_loader):
        data_time.update(time.time() - end)
        input, target = input.cuda(), target.cuda()
        output = model(input)
        loss = criterion(output, target)

        # accuracy and loss
        prec1, = accuracy(output.data, target, topk=(1, ))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))

        # gradient and optimizer
        loss.backward()
        if (idx + 1) % args.update_weight == 0:
            optimizer.step()
            optimizer.zero_grad()

        # time
        batch_time.update(time.time() - end)
        end = time.time()
        if (idx + 1) % args.print_freq == 0:
            output = ('Train: epoch-{0} ({1}/{2})\t'
                      'batch_time {batch_time.avg:.2f}\t\t'
                      'data_time {data_time.avg:.2f}\t\t'
                      'loss {loss.avg:.3f}\t'
                      'prec@1 {top1.avg:.2f}\t'.format(epoch,
                                                       idx + 1,
                                                       len(train_loader),
                                                       batch_time=batch_time,
                                                       data_time=data_time,
                                                       loss=losses,
                                                       top1=top1))
            batch_time.reset()
            data_time.reset()
            losses.reset()
            top1.reset()
            print(output)
            log.write(output + '\n')
            log.flush()

    tf_writer.add_scalar('loss/train', losses.avg, epoch)
    tf_writer.add_scalar('acc/train_top1', top1.avg, epoch)
    tf_writer.add_scalar('lr', optimizer.param_groups[-1]['lr'], epoch)
Ejemplo n.º 2
0
def main(args):
    print('[MAIN] Experiment {} start!'.format(args.exp_name))

    # define necessary variable
    torch.set_num_threads(1)
    feature_length = 800
    filepath = 'None'
    obs_shape = [800, 800, 800]
    num_class = 101
    log_file = "result/rl/" + args.exp_name + "_log.csv"
    num_updates = int(args.num_frames) // args.num_steps // args.num_processes
    with open(log_file, 'w') as f:
        f.write(
            'updates,num_timesteps,FPS,mean_reward,median_reward,min_reward,max_reward,entropy,value_loss,policy_loss,clf_loss,score,all_top1,all_top5\n'
        )

    # define classifier
    i3d_model_checkpoint = "result/0804_1708_e2e_ucf_model.pth.tar"
    clf = Classifier(feature_length, num_class, isbn=False)
    clf = load_clf_from_i3d(clf, i3d_model_checkpoint)
    clf = torch.nn.DataParallel(
        clf, device_ids=[i for i in range(torch.cuda.device_count())]).cuda()

    # clf_criterion = torch.nn.CrossEntropyLoss().cuda()
    # clf_optimizer = torch.optim.Adam(clf.parameters(), lr=args.lr)

    # define dataset
    train_dataset = FeatureDataset('features/thumos14/val/data.csv')
    eval_dataset = FeatureDataset(
        'features/thumos14/test/data.csv',
        is_thumos14_test_folder=True)  # eval detection

    # define environment
    fuser = Fuser(fuse_type='average')
    envs = []
    for i in range(args.num_processes):
        print("[MAIN]\tBegin prepare the {}th env!".format(i))
        envs.append(
            make_env(dataset=train_dataset,
                     classifier=clf,
                     fuser=fuser,
                     observation_space=obs_shape,
                     index=int(i),
                     threshold=0.4))
    if args.num_processes > 1:
        envs = SubprocVecEnv(envs)
    else:
        envs = DummyVecEnv(envs)
    envs = VecNormalize(envs, ob=False, ret=False, gamma=args.gamma)

    # define actor
    actor_critic = Policy(obs_shape, envs.action_space, output_size=256)
    if args.cuda:
        actor_critic.cuda()

    # define actor's update algorithm
    if args.algo == 'a2c':
        agent = A2C_ACKTR(actor_critic,
                          args.value_loss_coef,
                          args.entropy_coef,
                          lr=args.lr,
                          eps=args.eps,
                          alpha=args.alpha,
                          max_grad_norm=args.max_grad_norm)
    elif args.algo == 'ppo':
        agent = PPO(actor_critic,
                    args.clip_param,
                    args.ppo_epoch,
                    args.num_mini_batch,
                    args.value_loss_coef,
                    args.entropy_coef,
                    lr=args.lr,
                    eps=args.eps,
                    max_grad_norm=args.max_grad_norm)
    elif args.algo == 'acktr':
        agent = A2C_ACKTR(actor_critic,
                          args.value_loss_coef,
                          args.entropy_coef,
                          acktr=True)

    # prepare rollouts/observation
    rollouts = RolloutStorage(args.num_steps, args.num_processes,
                              (sum(obs_shape), ), envs.action_space, 1)
    current_obs = torch.zeros(args.num_processes, (sum(obs_shape, )))

    def update_current_obs(obs, current_obs):
        print(envs.observation_space.shape)
        shape_dim0 = envs.observation_space.shape[0]
        obs = torch.from_numpy(obs).float()
        current_obs[:, -shape_dim0:] = obs
        return current_obs

    obs = envs.reset()
    current_obs = update_current_obs(obs, current_obs)
    rollouts.observations[0].copy_(current_obs)
    if args.cuda:
        current_obs = current_obs.cuda()
        rollouts.cuda()

    # These variables are used to log training.
    episode_rewards = torch.zeros([args.num_processes, 1])
    final_rewards = torch.zeros([args.num_processes, 1])
    score = AverageMeter()
    avg_prop_length = AverageMeter()
    start = time.time()
    top1 = top5 = -1

    # start training
    for j in range(num_updates):
        score.reset()
        if j == 10:
            break
        for step in range(args.num_steps):
            # Sample actions
            with torch.no_grad():
                value, action, action_log_prob, states = actor_critic.act(
                    rollouts.observations[step], rollouts.states[step],
                    rollouts.masks[step])
            cpu_actions = action.squeeze(1).cpu().numpy()

            # Here is the step!
            obs, reward, done, info = envs.step(cpu_actions)
            print(
                "[MAIN]\tIn updates {}, step {}, startframe {}, endframe {}, totleframe {}, action{}, reward {}, prop_s {}, start_s {}, end_s {}".format(
                    j, \
                    step, [i['start_frame'] for i in info], [i['end_frame'] for i in info],
                    [i['max_frame'] * 16 + 15 for i in info], cpu_actions,
                    reward, [i['proposal_score'] for i in info], [i['start_score'] for i in info],
                    [i['end_score'] for i in info]))
            reward = torch.from_numpy(np.expand_dims(np.stack(reward),
                                                     1)).float()
            episode_rewards += reward
            label = torch.from_numpy(
                np.expand_dims(np.stack([i['label'] for i in info]),
                               1)).float()

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            final_rewards *= masks
            final_rewards += (1 - masks) * episode_rewards
            episode_rewards *= masks
            score.update(
                ((1 - masks.numpy()) *
                 np.array([i['proposal_score'] for i in info])).mean(),
                n=np.sum(1 - masks.numpy(), dtype=np.int32))
            avg_prop_length.update(np.mean(
                (1 - masks.numpy()) *
                np.array([i['start_frame'] - i['end_frame'] for i in info])),
                                   n=np.sum(1 - masks.numpy(), dtype=np.int32))
            if args.cuda:
                masks = masks.cuda()
            if current_obs.dim() == 4:
                current_obs *= masks.unsqueeze(2).unsqueeze(2)
            elif current_obs.dim() == 2:
                current_obs *= masks
            else:
                current_obs *= masks.unsqueeze(2)
            update_current_obs(obs, current_obs)
            rollouts.insert(current_obs, states, action, action_log_prob,
                            value, reward, masks, label)

        with torch.no_grad():
            next_value = actor_critic.get_value(rollouts.observations[-1],
                                                rollouts.states[-1],
                                                rollouts.masks[-1]).detach()
        rollouts.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.tau)
        value_loss, action_loss, dist_entropy = agent.update(rollouts)
        rollouts.after_update()
        clf_loss = 0
        # if j > 200:
        #     clf_loss = train_classifier(data=rollouts, model=clf, criterion=clf_criterion, optimizer=clf_optimizer)

        if j % args.save_interval == 0 and args.save_dir != "":
            save_path = os.path.join(args.save_dir, args.algo)
            try:
                os.makedirs(save_path)
            except OSError:
                pass

            state = {'updates': j + 1, 'state_dict': actor_critic.state_dict()}
            filepath = os.path.join(
                save_path,
                args.exp_name + "_up{:06d}_model.pth.tar".format(j + 1))
            torch.save(state, filepath)

        # if j % args.clf_test_interval == 0:
        #     top1, top5 = validate(val_loader=eval_loader, model=clf, criterion=clf_criterion)

        if j % args.log_interval == 0:
            end = time.time()
            total_num_steps = (j + 1) * args.num_processes * args.num_steps
            print(
                "[MAIN]\tUpdates {}, num timesteps {}, FPS {}, mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}, entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}, score {:.5f}"
                .format(j, total_num_steps,
                        int(total_num_steps / (end - start)),
                        final_rewards.mean(), final_rewards.median(),
                        final_rewards.min(), final_rewards.max(), dist_entropy,
                        value_loss, action_loss, score.avg))
            if top1:
                print('[MAIN]\tCLF TEST RUNNED! Top1 {}, TOP5 {}'.format(
                    top1, top5))
            with open(log_file, 'a') as f:
                f.write("{},{},{},{},{},{},{},{},{},{},{},{},{},{}\n".format(
                    j, total_num_steps, int(total_num_steps / (end - start)),
                    final_rewards.mean(), final_rewards.median(),
                    final_rewards.min(), final_rewards.max(), dist_entropy,
                    value_loss, action_loss, clf_loss, score.avg, top1, top5))
            top1 = top5 = None
    return filepath