Exemple #1
0
def main(args):
    os.environ['CUDA_VISIBLE_DEVICES'] = '3'
    # create checkpoint dir
    if not isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # create model
    model = network.__dict__[cfg.model](cfg.output_shape, cfg.num_class, pretrained = False)
    model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion1 = torch.nn.MSELoss().cuda() # for Global loss
    criterion2 = torch.nn.MSELoss(reduce=False).cuda() # for refine loss
    optimizer = torch.optim.Adam(model.parameters(),
                                lr = cfg.lr,
                                weight_decay=cfg.weight_decay)
    
    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            pretrained_dict = checkpoint['state_dict']
            model.load_state_dict(pretrained_dict)
            args.start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
            logger = Logger(join(args.checkpoint, 'log.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:        
        logger = Logger(join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'LR', 'Train Loss'])

    cudnn.benchmark = True
    print('    Total params: %.2fMB' % (sum(p.numel() for p in model.parameters())/(1024*1024)*4))

    train_loader = torch.utils.data.DataLoader(
        MscocoMulti(cfg),
        batch_size=cfg.batch_size*args.num_gpus, shuffle=True,
        num_workers=args.workers, pin_memory=True) 

    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch, cfg.lr_gamma)
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr)) 

        # train for one epoch
        train_loss = train(train_loader, model, [criterion1, criterion2], optimizer)
        print('train_loss: ',train_loss)

        # append logger file
        logger.append([epoch + 1, lr, train_loss])

        save_model({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer' : optimizer.state_dict(),
        }, checkpoint=args.checkpoint)

    logger.close()
 def run_train(self):
     final_epoch = False
     for epoch in range(1, args.n_epochs + 1):
         misc.log(
             self.log_path, 'Elapsed Time: {}/{}\n'.format(
                 self.timer.measure(),
                 self.timer.measure(epoch / float(args.n_epochs))))
         if self.scheduler:
             lr = self.scheduler.get_lr()[0]
         else:
             lr = args.lr
         self.train(epoch)
         acc = self.evaluate()
         improvement = acc > self.best_acc
         self.best_acc = max(acc, self.best_acc)
         misc.log(
             self.log_path,
             'Best Accuracy: {} | Current Learning Rate: {}'.format(
                 np.round(self.best_acc, 5), np.round(lr, 5)))
         if epoch == args.n_epochs:
             final_epoch = True
         if args.save:
             misc.save_model(args=args,
                             model_name=self.model_name,
                             best_acc=self.best_acc,
                             stats=util.stats,
                             state={
                                 'epoch': epoch,
                                 'state_dict': self.net.state_dict(),
                                 'best_acc': self.best_acc,
                                 'optimizer': self.optimizer.state_dict()
                             },
                             improvement=improvement,
                             epoch=epoch,
                             final_epoch=final_epoch)
Exemple #3
0
def train():
    processes = []
    if os.path.isdir(args.log_dir):
        ans = input('{} exists\ncontinue and overwrite? y/n: '.format(
            args.log_dir))
        if ans == 'n':
            return

    logger.configure(dir=args.log_dir, format_strs=['stdout', 'log', 'csv'])
    logger.log(args)
    json.dump(vars(args), open(os.path.join(args.log_dir, 'params.json'), 'w'))

    torch.set_num_threads(2)

    start = time.time()
    policy_update_time, policy_forward_time = 0, 0
    step_time_env, step_time_total, step_time_rewarder = 0, 0, 0
    visualize_time = 0
    rewarder_fit_time = 0

    envs = ContextualEnvInterface(args)
    if args.look:
        looker = Looker(args.log_dir)

    actor_critic, agent = initialize_policy(envs)

    rollouts = RolloutStorage(args.num_steps, args.num_processes,
                              envs.obs_shape, envs.action_space,
                              actor_critic.recurrent_hidden_state_size)
    rollouts.to(args.device)

    def copy_obs_into_beginning_of_storage(obs):
        rollouts.obs[0].copy_(obs)

    for j in range(args.num_updates):

        obs = envs.reset(
        )  # have to reset here to use updated rewarder to sample tasks
        copy_obs_into_beginning_of_storage(obs)

        if args.use_linear_lr_decay:
            update_linear_schedule(agent.optimizer, j, args.num_updates,
                                   args.lr)

        if args.algo == 'ppo' and args.use_linear_clip_decay:
            agent.clip_param = args.clip_param * (1 -
                                                  j / float(args.num_updates))

        log_marginal = 0
        lambda_log_s_given_z = 0

        for step in range(args.num_steps):
            # Sample actions
            policy_forward_start = time.time()
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                    rollouts.obs[step], rollouts.recurrent_hidden_states[step],
                    rollouts.masks[step])
            policy_forward_time += time.time() - policy_forward_start

            # Obser reward and next obs
            step_total_start = time.time()
            obs, reward, done, info = envs.step(action)
            step_time_total += time.time() - step_total_start
            step_time_env += info['step_time_env']
            step_time_rewarder += info['reward_time']
            if args.rewarder == 'unsupervised' and args.clusterer == 'vae':
                log_marginal += info['log_marginal'].sum().item()
                lambda_log_s_given_z += info['lambda_log_s_given_z'].sum(
                ).item()

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            rollouts.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, value, reward, masks)

        assert all(done)

        # policy update
        with torch.no_grad():
            next_value = actor_critic.get_value(
                rollouts.obs[-1], rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.tau)

        policy_update_start = time.time()
        if args.rewarder != 'supervised' and envs.rewarder.fit_counter == 0:
            value_loss, action_loss, dist_entropy = 0, 0, 0
        else:
            value_loss, action_loss, dist_entropy = agent.update(rollouts)
        policy_update_time += time.time() - policy_update_start
        rollouts.after_update()

        # metrics
        trajectories = envs.trajectories_current_update
        state_entropy = calculate_state_entropy(args, trajectories)

        return_avg = rollouts.rewards.sum() / args.trials_per_update
        reward_avg = return_avg / (args.trial_length * args.episode_length)
        log_marginal_avg = log_marginal / args.trials_per_update / (
            args.trial_length * args.episode_length)
        lambda_log_s_given_z_avg = lambda_log_s_given_z / args.trials_per_update / (
            args.trial_length * args.episode_length)

        num_steps = (j + 1) * args.num_steps * args.num_processes
        num_episodes = num_steps // args.episode_length
        num_trials = num_episodes // args.trial_length

        logger.logkv('state_entropy', state_entropy)
        logger.logkv('value_loss', value_loss)
        logger.logkv('action_loss', action_loss)
        logger.logkv('dist_entropy', dist_entropy)
        logger.logkv('return_avg', return_avg.item())
        logger.logkv('reward_avg', reward_avg.item())
        logger.logkv('steps', num_steps)
        logger.logkv('episodes', num_episodes)
        logger.logkv('trials', num_trials)
        logger.logkv('policy_updates', (j + 1))
        logger.logkv('time', time.time() - start)
        logger.logkv('policy_forward_time', policy_forward_time)
        logger.logkv('policy_update_time', policy_update_time)
        logger.logkv('step_time_rewarder', step_time_rewarder)
        logger.logkv('step_time_env', step_time_env)
        logger.logkv('step_time_total', step_time_total)
        logger.logkv('visualize_time', visualize_time)
        logger.logkv('rewarder_fit_time', rewarder_fit_time)
        if args.rewarder == 'unsupervised' and args.clusterer == 'vae':
            logger.logkv('log_marginal_avg', log_marginal_avg)
            logger.logkv('lambda_log_s_given_z_avg', lambda_log_s_given_z_avg)
        logger.dumpkvs()

        if (j % args.save_period == 0
                or j == args.num_updates - 1) and args.log_dir != '':
            save_model(args, actor_critic, envs, iteration=j)

        if j % args.rewarder_fit_period == 0:
            rewarder_fit_start = time.time()
            envs.fit_rewarder()
            rewarder_fit_time += time.time() - rewarder_fit_start

        if (j % args.vis_period == 0
                or j == args.num_updates - 1) and args.log_dir != '':
            visualize_start = time.time()
            if args.look:
                looker.look(iteration=j)
            if args.plot:
                p = Popen('python visualize.py --log-dir {}'.format(
                    args.log_dir),
                          shell=True)
                processes.append(p)
            visualize_time += time.time() - visualize_start
Exemple #4
0
def train():
    processes = []
    if os.path.isdir(args.log_dir):
        ans = input('{} exists\ncontinue and overwrite? y/n: '.format(args.log_dir))
        if ans == 'n':
            return

    logger.configure(dir=args.log_dir, format_strs=['stdout', 'log', 'csv'])
    logger.log(args)
    json.dump(vars(args), open(os.path.join(args.log_dir, 'params.json'), 'w'))

    torch.set_num_threads(2)

    start = time.time()
    policy_update_time, policy_forward_time = 0, 0
    step_time_env, step_time_total, step_time_rewarder = 0, 0, 0
    visualize_time = 0
    rewarder_fit_time = 0

    envs = RL2EnvInterface(args)
    if args.look:
        looker = Looker(args.log_dir)

    actor_critic = Policy(envs.obs_shape, envs.action_space,
                          base=RL2Base, base_kwargs={'recurrent': True,
                                                     'num_act_dim': envs.action_space.shape[0]})
    actor_critic.to(args.device)
    agent = algo.PPO(actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch,
                     args.value_loss_coef, args.entropy_coef, lr=args.lr,
                     eps=args.eps,
                     max_grad_norm=args.max_grad_norm)

    rollouts = RolloutStorage(args.num_steps, args.num_processes,
                        envs.obs_shape, envs.action_space,
                        actor_critic.recurrent_hidden_state_size)
    rollouts.to(args.device)

    def copy_obs_into_beginning_of_storage(obs):
        obs_raw, obs_act, obs_rew, obs_flag = obs
        rollouts.obs[0].copy_(obs_raw)
        rollouts.obs_act[0].copy_(obs_act)
        rollouts.obs_rew[0].copy_(obs_rew)
        rollouts.obs_flag[0].copy_(obs_flag)

    for j in range(args.num_updates):
        obs = envs.reset()
        copy_obs_into_beginning_of_storage(obs)

        if args.use_linear_lr_decay:
            update_linear_schedule(agent.optimizer, j, args.num_updates, args.lr)

        if args.algo == 'ppo' and args.use_linear_clip_decay:
            agent.clip_param = args.clip_param  * (1 - j / float(args.num_updates))

        episode_returns = [0 for i in range(args.trial_length)]
        episode_final_reward = [0 for i in range(args.trial_length)]
        i_episode = 0

        log_marginal = 0
        lambda_log_s_given_z = 0

        for step in range(args.num_steps):
            # Sample actions
            policy_forward_start = time.time()
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                        rollouts.get_obs(step),
                        rollouts.recurrent_hidden_states[step],
                        rollouts.masks[step])
            policy_forward_time += time.time() - policy_forward_start

            # Obser reward and next obs
            step_total_start = time.time()
            obs, reward, done, info = envs.step(action)
            step_time_total += time.time() - step_total_start
            step_time_env += info['step_time_env']
            step_time_rewarder += info['reward_time']
            log_marginal += info['log_marginal'].sum().item()
            lambda_log_s_given_z += info['lambda_log_s_given_z'].sum().item()

            episode_returns[i_episode] += reward.sum().item()
            if all(done['episode']):
                episode_final_reward[i_episode] += reward.sum().item()
                i_episode = (i_episode + 1) % args.trial_length

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done['trial']])
            rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks)

        assert all(done['trial'])

        with torch.no_grad():
            next_value = actor_critic.get_value(rollouts.get_obs(-1),
                                                rollouts.recurrent_hidden_states[-1],
                                                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau)

        policy_update_start = time.time()
        if args.rewarder != 'supervised' and envs.rewarder.fit_counter == 0 and not args.vae_load:
            value_loss, action_loss, dist_entropy = 0, 0, 0
        else:
            value_loss, action_loss, dist_entropy = agent.update(rollouts)
        policy_update_time += time.time() - policy_update_start
        rollouts.after_update()

        # metrics
        trajectories_pre = envs.trajectories_pre_current_update
        state_entropy_pre = calculate_state_entropy(args, trajectories_pre)

        trajectories_post = envs.trajectories_post_current_update
        state_entropy_post = calculate_state_entropy(args, trajectories_post)

        return_avg = rollouts.rewards.sum() / args.trials_per_update
        reward_avg = return_avg / (args.trial_length * args.episode_length)
        log_marginal_avg = log_marginal / args.trials_per_update / (args.trial_length * args.episode_length)
        lambda_log_s_given_z_avg = lambda_log_s_given_z / args.trials_per_update / (args.trial_length * args.episode_length)

        num_steps = (j + 1) * args.num_steps * args.num_processes
        num_episodes = num_steps // args.episode_length
        num_trials = num_episodes // args.trial_length

        logger.logkv('state_entropy_pre', state_entropy_pre)
        logger.logkv('state_entropy_post', state_entropy_post)
        logger.logkv('value_loss', value_loss)
        logger.logkv('action_loss', action_loss)
        logger.logkv('dist_entropy', dist_entropy)
        logger.logkv('return_avg', return_avg.item())
        logger.logkv('reward_avg', reward_avg.item())
        logger.logkv('steps', (j + 1) * args.num_steps * args.num_processes)
        logger.logkv('episodes', num_episodes)
        logger.logkv('trials', num_trials)
        logger.logkv('policy_updates', (j + 1))
        logger.logkv('time', time.time() - start)
        logger.logkv('policy_forward_time', policy_forward_time)
        logger.logkv('policy_update_time', policy_update_time)
        logger.logkv('step_time_rewarder', step_time_rewarder)
        logger.logkv('step_time_env', step_time_env)
        logger.logkv('step_time_total', step_time_total)
        logger.logkv('visualize_time', visualize_time)
        logger.logkv('rewarder_fit_time', rewarder_fit_time)
        logger.logkv('log_marginal_avg', log_marginal_avg)
        logger.logkv('lambda_log_s_given_z_avg', lambda_log_s_given_z_avg)
        for i_episode in range(args.trial_length):
            logger.logkv('episode_return_avg_{}'.format(i_episode),
                         episode_returns[i_episode] / args.trials_per_update)
            logger.logkv('episode_final_reward_{}'.format(i_episode),
                         episode_final_reward[i_episode] / args.trials_per_update)

        if (j % args.save_period == 0 or j == args.num_updates - 1) and args.log_dir != '':
            save_model(args, actor_critic, envs, iteration=j)

        if not args.vae_freeze and j % args.rewarder_fit_period == 0:
            rewarder_fit_start = time.time()
            envs.fit_rewarder()
            rewarder_fit_time += time.time() - rewarder_fit_start

        if (j % args.vis_period == 0 or j == args.num_updates - 1) and args.log_dir != '':
            visualize_start = time.time()
            if args.look:
                eval_return_avg, eval_episode_returns, eval_episode_final_reward = looker.look(iteration=j)
                logger.logkv('eval_return_avg', eval_return_avg)
                for i_episode in range(args.trial_length):
                    logger.logkv('eval_episode_return_avg_{}'.format(i_episode),
                                 eval_episode_returns[i_episode] / args.trials_per_update)
                    logger.logkv('eval_episode_final_reward_{}'.format(i_episode),
                                 eval_episode_final_reward[i_episode] / args.trials_per_update)

            if args.plot:
                p = Popen('python visualize.py --log-dir {}'.format(args.log_dir), shell=True)
                processes.append(p)
            visualize_time += time.time() - visualize_start

        logger.dumpkvs()
Exemple #5
0
def train_model(model, dataloaders, optimizer, scheduler, num_epochs=1):
    since = time.time()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    logger = Logger(join(opt.output, f'log.txt'))
    best_loss = 1e5 # set to some large enough value

    criterion = Criterion()
    data_dict = dict()
    for epoch in range(num_epochs):
        scheduler.step()
        lr = scheduler.get_lr()[-1]
        print(f'Epoch: {epoch+1}/{num_epochs} LR: {lr:.3E}')
        data_dict['Epoch'] = epoch
        data_dict['LR'] = lr

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            # Iterate over data.
            batch_time  = AverageMeter()
            data_time   = AverageMeter()
            loss_meter  = AverageMeter()

            end = time.time()
            bar_name = 'Training' if phase=='train' else 'Testing '
            num_batch = len(dataloaders[phase])
            bar = Bar(bar_name, max=num_batch)
            # Iterate over data.
            for i,(inputs, targets) in enumerate(dataloaders[phase]):
                # measure data loading time
                data_time.update(time.time() - end)

                # move data to GPU
                inputs  = inputs.to(device).float()
                targets = targets.to(device).float()

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss, _ = criterion.eval(outputs, targets)

                # measure accuracy and record loss
                loss_meter.update(loss.item(), inputs.shape[0])
                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

                # plot progress
                bar.suffix  = f'({i+1:04d}/{num_batch:04d}) Data: {data_time.val:.6f}s | Batch: {batch_time.val:.3f}s | Total: {bar.elapsed_td:} | ETA: {bar.eta_td:} | Loss: {loss_meter.avg:.4f}'
                bar.next()
            bar.finish()
            data_dict[f'{phase} Loss'] = loss_meter.avg

            # save last model 
            if phase == 'train':
                save_model(model, join(opt.output, f'last.pth'))
            else:
                is_best = data_dict[f'val Loss'] < best_loss
                if is_best:
                    best_loss = data_dict[f'val Loss']
                    save_model(model, join(opt.output, f'best.pth'))
        # update the log
        logger.update(data_dict)
def main():
    args = parse_args()
    update_config(cfg_hrnet, args)

    # create checkpoint dir
    if not isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # create model
    #print('networks.'+ cfg_hrnet.MODEL.NAME+'.get_pose_net')
    model = eval('models.' + cfg_hrnet.MODEL.NAME + '.get_pose_net')(
        cfg_hrnet, is_train=True)
    model = torch.nn.DataParallel(model, device_ids=args.gpus).cuda()

    # show net
    args.channels = 3
    args.height = cfg.data_shape[0]
    args.width = cfg.data_shape[1]
    #net_vision(model, args)

    # define loss function (criterion) and optimizer
    criterion = torch.nn.MSELoss(reduction='mean').cuda()

    #torch.optim.Adam
    optimizer = AdaBound(model.parameters(),
                         lr=cfg.lr,
                         weight_decay=cfg.weight_decay)

    if args.resume:
        if isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            pretrained_dict = checkpoint['state_dict']
            model.load_state_dict(pretrained_dict)
            args.start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            logger = Logger(join(args.checkpoint, 'log.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))
    else:
        logger = Logger(join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'LR', 'Train Loss'])

    cudnn.benchmark = True
    torch.backends.cudnn.enabled = True
    print('    Total params: %.2fMB' %
          (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4))

    train_loader = torch.utils.data.DataLoader(
        #MscocoMulti(cfg),
        KPloader(cfg),
        batch_size=cfg.batch_size * len(args.gpus))
    #, shuffle=True,
    #num_workers=args.workers, pin_memory=True)

    #for i, (img, targets, valid) in enumerate(train_loader):
    #    print(i, img, targets, valid)

    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch,
                                  cfg.lr_gamma)
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))

        # train for one epoch
        train_loss = train(train_loader, model, criterion, optimizer)
        print('train_loss: ', train_loss)

        # append logger file
        logger.append([epoch + 1, lr, train_loss])

        save_model(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            },
            checkpoint=args.checkpoint)

    logger.close()
Exemple #7
0
def main(args):
    # import pdb; pdb.set_trace()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # device = torch.device("cpu")
    print(device)

    writer = SummaryWriter(cfg.tensorboard_path)
    # create checkpoint dir
    counter = 0
    if not isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # create model
    model = network.__dict__[cfg.model](cfg.output_shape,
                                        cfg.num_class,
                                        pretrained=True)

    model = torch.nn.DataParallel(model).to(device)
    # model = model.to(device)

    # define loss function (criterion) and optimizer
    criterion_bce = torch.nn.BCELoss().to(device)
    criterion_abs = torch.nn.L1Loss().to(device)
    # criterion_abs = offset_loss().to(device)
    # criterion1 = torch.nn.MSELoss().to(device) # for Global loss
    # criterion2 = torch.nn.MSELoss(reduce=False).to(device) # for refine loss
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=cfg.lr,
                                 weight_decay=cfg.weight_decay)

    if args.resume:
        print(args.resume)
        checkpoint_file_resume = os.path.join(args.checkpoint,
                                              args.resume + '.pth.tar')
        if isfile(checkpoint_file_resume):
            print("=> loading checkpoint '{}'".format(checkpoint_file_resume))
            checkpoint = torch.load(checkpoint_file_resume)
            pretrained_dict = checkpoint['state_dict']
            model.load_state_dict(pretrained_dict)
            args.start_epoch = checkpoint['epoch']
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_file_resume, checkpoint['epoch']))
            logger = Logger(join(args.checkpoint, 'log.txt'), resume=True)
        else:
            print("=> no checkpoint found at '{}'".format(
                checkpoint_file_resume))
    else:
        logger = Logger(join(args.checkpoint, 'log.txt'))
        logger.set_names(['Epoch', 'LR', 'Train Loss'])

    cudnn.benchmark = True
    print('    Total params: %.2fMB' %
          (sum(p.numel() for p in model.parameters()) / (1024 * 1024) * 4))

    train_loader = torch.utils.data.DataLoader(MscocoMulti_double_only(cfg),
                                               batch_size=cfg.batch_size *
                                               args.num_gpus,
                                               shuffle=True,
                                               num_workers=args.workers,
                                               pin_memory=True)

    for epoch in range(args.start_epoch, args.epochs):
        lr = adjust_learning_rate(optimizer, epoch, cfg.lr_dec_epoch,
                                  cfg.lr_gamma)
        print('\nEpoch: %d | LR: %.8f' % (epoch + 1, lr))

        # train for one epoch
        train_loss, counter = train(train_loader, model,
                                    [criterion_abs, criterion_bce], writer,
                                    counter, optimizer, device)
        print('train_loss: ', train_loss)

        # append logger file
        logger.append([epoch + 1, lr, train_loss])

        save_model(
            {
                'epoch': epoch + 1,
                'info': cfg.info,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            },
            checkpoint=args.checkpoint)

    writer.export_scalars_to_json("./test.json")
    writer.close()

    logger.close()
Exemple #8
0
def run():
    # Dataset
    transform = transforms.Compose([
        transforms.Resize(64),
        transforms.ToTensor(),
        transforms.Normalize((0.5, ), (0.5, ))
    ])

    dataset = datasets.MNIST('.', transform=transform, download=True)
    dataloader = data.DataLoader(dataset, batch_size=4)
    print("[INFO] Define DataLoader")

    # Define Model
    g = Generator()
    d = Discriminator()
    print("[INFO] Define Model")

    # optimizer, loss
    gan_loss = GANLoss()

    optim_G = optim.Adam(g.parameters(), lr=0.0002, betas=(0.5, 0.999))
    optim_D = optim.Adam(d.parameters(), lr=0.0002, betas=(0.5, 0.999))
    print('[INFO] Define optimizer and loss')

    # train
    num_epoch = 2

    print('[INFO] Start Training!!')
    for epoch in range(num_epoch):
        total_batch = len(dataloader)

        for idx, (image, _) in enumerate(dataloader):
            d.train()
            g.train()

            # fake image 생성
            noise = torch.randn(4, 100, 1, 1)
            output_fake = g(noise)

            # Loss

            d_loss_fake = gan_loss(d(output_fake.detach()), False)
            d_loss_real = gan_loss(d(image), True)
            d_loss = (d_loss_fake + d_loss_real) / 2

            g_loss = gan_loss(d(output_fake), True)

            # update
            optim_G.zero_grad()
            g_loss.backward()
            optim_G.step()

            optim_D.zero_grad()
            d_loss.backward()
            optim_D.step()

            if ((epoch * total_batch) + idx) % 1000 == 0:
                print(
                    'Epoch [%d/%d], Iter [%d/%d], D_loss: %.4f, G_loss: %.4f' %
                    (epoch, num_epoch, idx + 1, total_batch, d_loss.item(),
                     g_loss.item()))

                save_model('model', 'GAN', g, {'loss': g_loss.item()})
Exemple #9
0
    def train(self):
        num_param_updates = 0
        loss_acc_since_last_log = 0.0
        param_updates_since_last_log = 0
        num_episodes = 0

        state = self.env.reset()[..., np.newaxis]
        for t in tqdm(range(self.total_timesteps)):
            last_idx = self.memory.store_frame(state)
            recent_observations = self.memory.encode_recent_observation()

            # Choose random action if learning hasn't started yet
            if t > self.learning_start:
                action = self.select_epsilon_greedy_action(
                    recent_observations, t).item()
            else:
                action = random.randrange(self.num_actions)

            # Advance a step
            next_state, reward, done, _ = self.env.step(action)
            next_state = next_state[..., np.newaxis]

            # Store result in memory
            self.memory.store_effect(last_idx, action, reward, done)

            # Reset if done (life lost, due to atari wrapper)
            if done:
                next_state = self.env.reset()
                next_state = next_state[..., np.newaxis]
            state = next_state

            # Train network using experience replay when
            # memory is sufficiently large.
            if (t > self.learning_start and t % self.learning_freq == 0
                    and self.memory.can_sample(self.batch_size)):
                # Sample from replay buffer
                (
                    state_batch,
                    act_batch,
                    r_batch,
                    next_state_batch,
                    done_mask,
                ) = self.memory.sample(self.batch_size)
                state_batch = torch.from_numpy(state_batch).type(
                    self.dtype) / 255.0
                act_batch = torch.from_numpy(act_batch).long().to(self.device)
                r_batch = torch.from_numpy(r_batch).to(self.device)
                next_state_batch = (
                    torch.from_numpy(next_state_batch).type(self.dtype) /
                    255.0)
                not_done_mask = torch.from_numpy(1 - done_mask).type(
                    self.dtype)

                # Calculate current Q value
                current_Q_vals = self.Q(state_batch).gather(
                    1, act_batch.unsqueeze(1))

                # Calculate next Q value based on action that gives max Q vals
                next_max_Q = self.target_Q(next_state_batch).detach().max(
                    dim=1)[0]
                next_Q_vals = not_done_mask * next_max_Q

                # Calculate target of current Q values
                target_Q_vals = r_batch + (self.gamma * next_Q_vals)

                # Calculate loss and backprop
                loss = F.smooth_l1_loss(current_Q_vals.squeeze(),
                                        target_Q_vals)
                self.optimizer.zero_grad()
                loss.backward()
                for param in self.Q.parameters():
                    param.grad.data.clamp_(-1, 1)

                # Update weights
                self.optimizer.step()
                num_param_updates += 1

                # Store stats
                loss_acc_since_last_log += loss.item()
                param_updates_since_last_log += 1

                # Update target network periodically
                if num_param_updates % self.target_update_freq == 0:
                    self.target_Q.load_state_dict(self.Q.state_dict())

                # Save model checkpoint
                if num_param_updates % self.checkpoint_frequency == 0:
                    save_model_checkpoint(
                        self.Q,
                        self.optimizer,
                        t,
                        f"{self.out_dir}/checkpoints/{self.model_name}_{num_param_updates}",
                    )

                # Log progress
                if (num_param_updates % (self.log_freq // 2) == 0
                        and param_updates_since_last_log > 0):
                    self.writer.add_scalar(
                        "Mean Loss per Update (Updates)",
                        loss_acc_since_last_log / param_updates_since_last_log,
                        num_param_updates,
                    )
                    loss_acc_since_last_log = 0.0
                    param_updates_since_last_log = 0

                if num_param_updates % self.log_freq == 0:
                    wrapper = get_wrapper_by_name(self.env, "Monitor")
                    episode_rewards = wrapper.get_episode_rewards()
                    mean_reward = round(np.mean(episode_rewards[-101:-1]), 2)
                    sum_reward = np.sum(episode_rewards[-101:-1])
                    episode_lengths = wrapper.get_episode_lengths()
                    mean_duration = round(np.mean(episode_lengths[-101:-1]), 2)
                    sum_duration = np.sum(episode_lengths[-101:-1])

                    self.writer.add_scalar(
                        f"Mean Reward (epoch = {self.log_freq} updates)",
                        mean_reward,
                        num_param_updates // self.log_freq,
                    )
                    self.writer.add_scalar(
                        f"Mean Duration (epoch = {self.log_freq} updates)",
                        mean_duration,
                        num_param_updates // self.log_freq,
                    )
                    self.writer.add_scalar(
                        f"Mean Reward per Timestep (epoch = {self.log_freq} updates)",
                        round(sum_reward / sum_duration, 2),
                        num_param_updates // self.log_freq,
                    )

            if done:
                num_episodes += 1

        # Save model
        save_model(self.Q, f"{self.out_dir}/{self.model_name}.model")

        self.env.close()

        print(f"Number of Episodes: {num_episodes}")

        return self.Q
Exemple #10
0
            # fake image 생성

            noise = torch.randn(4, 100, 1, 1)
            output_fake = g(noise, label)

            # Loss

            d_loss_fake = gan_loss(d(output_fake.detach(), label), False)
            d_loss_real = gan_loss(d(image, label), True)
            d_loss = (d_loss_fake + d_loss_real) / 2

            g_loss = gan_loss(d(output_fake, label), True)

            # update
            optim_G.zero_grad()
            g_loss.backward()
            optim_G.step()

            optim_D.zero_grad()
            d_loss.backward()
            optim_D.step()

            if ((epoch * total_batch) + idx) % 1000 == 0:
                print('Epoch [%d/%d], Iter [%d/%d], D_loss: %.4f, G_loss: %.4f'
                      % (epoch, num_epoch, idx + 1, total_batch, d_loss.item(), g_loss.item()))

                save_model('model', 'GAN', g, {'loss': g_loss.item()})