def main(): # argument parse and create log args = parse_args() cfg.set_args(args.gpu_ids, args.mode, args.continue_train) cudnn.benchmark = True trainer = Trainer() trainer._make_batch_generator() trainer._make_model() # train for epoch in range(trainer.start_epoch, cfg.end_epoch): trainer.set_lr(epoch) trainer.tot_timer.tic() trainer.read_timer.tic() for itr, (inputs, targets, meta_info) in enumerate(trainer.batch_generator): trainer.read_timer.toc() trainer.gpu_timer.tic() # forward trainer.optimizer.zero_grad() loss = trainer.model(inputs, targets, meta_info, 'train') loss = {k: loss[k].mean() for k in loss} # backward sum(loss[k] for k in loss).backward() trainer.optimizer.step() trainer.gpu_timer.toc() screen = [ 'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch), 'lr: %g' % (trainer.get_lr()), 'speed: %.2f(%.2fs r%.2f)s/itr' % (trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time), '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch), ] screen += [ '%s: %.4f' % ('loss_' + k, v.detach()) for k, v in loss.items() ] trainer.logger.info(' '.join(screen)) trainer.tot_timer.toc() trainer.tot_timer.tic() trainer.read_timer.tic() trainer.save_model( { 'epoch': epoch, 'network': trainer.model.state_dict(), 'optimizer': trainer.optimizer.state_dict(), }, epoch)
def main(): # argument parse and create log args = parse_args() cfg.set_args(args.gpu_ids, args.continue_train) cudnn.fastest = True cudnn.benchmark = True trainer = Trainer() trainer._make_batch_generator() trainer._make_model() # train for epoch in range(trainer.start_epoch, cfg.end_epoch): trainer.set_lr(epoch) trainer.tot_timer.tic() trainer.read_timer.tic() for itr, (input_img, k_value, root_img, root_vis, joints_have_depth) in enumerate(trainer.batch_generator): trainer.read_timer.toc() trainer.gpu_timer.tic() # forward trainer.optimizer.zero_grad() target = {'coord': root_img, 'vis': root_vis, 'have_depth': joints_have_depth} loss_coord = trainer.model(input_img, k_value, target) loss_coord = loss_coord.mean(); # backward loss = loss_coord loss.backward() trainer.optimizer.step() trainer.gpu_timer.toc() screen = [ 'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch), 'lr: %g' % (trainer.get_lr()), 'speed: %.2f(%.2fs r%.2f)s/itr' % ( trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time), '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch), '%s: %.4f' % ('loss_coord', loss_coord.detach()), ] trainer.logger.info(' '.join(screen)) trainer.tot_timer.toc() trainer.tot_timer.tic() trainer.read_timer.tic() trainer.save_model({ 'epoch': epoch, 'network': trainer.model.state_dict(), 'optimizer': trainer.optimizer.state_dict(), }, epoch)
def main(): # argument parse and create log args = parse_args() # restrict one gpu : not support distributed learning cfg.set_args(args.gpu) cudnn.benchmark = True # set trainer trainer = Trainer() trainer.build_dataloader() trainer.build_model() trainer.set_optimizer() trainer.set_scheduler() start_epoch = 0 # load model if cfg.load_checkpoint: start_epoch = trainer.load_model() # logger logger = Logger() # train model for epoch in range(start_epoch, cfg.epoch): for i, data in enumerate(trainer.dataloader): trainer.optimizer.zero_grad() proposal_loss, detection_loss = trainer.model(data) loss = proposal_loss[ 0] + proposal_loss[1] * cfg.pro_loc_lambda + detection_loss[ 0] + detection_loss[1] * cfg.det_loc_lambda loss.backward() trainer.optimizer.step() logger.log(proposal_loss, detection_loss, epoch, i, epoch * len(trainer.dataloader) + i) if cfg.visualize_switch: if i % 500 == 0: cfg.visualize = True else: cfg.visualize = False trainer.scheduler.step() if cfg.save_checkpoint: trainer.save_model(epoch)
def main(): # argument parse and create log args = parse_args() cfg.set_args(args.gpu_ids, args.continue_train) cudnn.fastest = True cudnn.benchmark = True trainer = Trainer() trainer._make_batch_generator() trainer._make_model() # train for epoch in range(trainer.start_epoch, cfg.end_epoch): trainer.set_lr(epoch) trainer.tot_timer.tic() trainer.read_timer.tic() for itr in range(trainer.itr_per_epoch): input_img_list, joint_img_list, joint_vis_list, joints_have_depth_list = [], [], [], [] for i in range(len(cfg.trainset)): try: input_img, joint_img, joint_vis, joints_have_depth = next(trainer.iterator[i]) except StopIteration: trainer.iterator[i] = iter(trainer.batch_generator[i]) input_img, joint_img, joint_vis, joints_have_depth = next(trainer.iterator[i]) input_img_list.append(input_img) joint_img_list.append(joint_img) joint_vis_list.append(joint_vis) joints_have_depth_list.append(joints_have_depth) # aggregate items from different datasets into one single batch input_img = torch.cat(input_img_list,dim=0) joint_img = torch.cat(joint_img_list,dim=0) joint_vis = torch.cat(joint_vis_list,dim=0) joints_have_depth = torch.cat(joints_have_depth_list,dim=0) # shuffle items from different datasets rand_idx = [] for i in range(len(cfg.trainset)): rand_idx.append(torch.arange(i,input_img.shape[0],len(cfg.trainset))) rand_idx = torch.cat(rand_idx,dim=0) rand_idx = rand_idx[torch.randperm(input_img.shape[0])] input_img = input_img[rand_idx]; joint_img = joint_img[rand_idx]; joint_vis = joint_vis[rand_idx]; joints_have_depth = joints_have_depth[rand_idx]; target = {'coord': joint_img, 'vis': joint_vis, 'have_depth': joints_have_depth} trainer.read_timer.toc() trainer.gpu_timer.tic() trainer.optimizer.zero_grad() # forward loss_coord = trainer.model(input_img, target) loss_coord = loss_coord.mean() # backward loss = loss_coord loss.backward() trainer.optimizer.step() trainer.gpu_timer.toc() screen = [ 'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch), 'lr: %g' % (trainer.get_lr()), 'speed: %.2f(%.2fs r%.2f)s/itr' % ( trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time), '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch), '%s: %.4f' % ('loss_coord', loss_coord.detach()), ] trainer.logger.info(' '.join(screen)) trainer.tot_timer.toc() trainer.tot_timer.tic() trainer.read_timer.tic() trainer.save_model({ 'epoch': epoch, 'network': trainer.model.state_dict(), 'optimizer': trainer.optimizer.state_dict(), }, epoch)
def main(): # argument parse and create log args = parse_args() cfg.set_args(args.gpu_ids, args.continue_train) cudnn.fastest = True cudnn.benchmark = True cudnn.deterministic = False cudnn.enabled = True trainer = Trainer(cfg) trainer._make_batch_generator() trainer._make_model() # train for epoch in range(trainer.start_epoch, cfg.end_epoch): trainer.scheduler.step() trainer.tot_timer.tic() trainer.read_timer.tic() for itr, (input_img, joint_img, joint_vis, joints_have_depth) in enumerate(trainer.batch_generator): trainer.read_timer.toc() trainer.gpu_timer.tic() trainer.optimizer.zero_grad() input_img = input_img.cuda() joint_img = joint_img.cuda() joint_vis = joint_vis.cuda() joints_have_depth = joints_have_depth.cuda() # forward heatmap_out = trainer.model(input_img) # backward JointLocationLoss = trainer.JointLocationLoss( heatmap_out, joint_img, joint_vis, joints_have_depth) loss = JointLocationLoss loss.backward() trainer.optimizer.step() trainer.gpu_timer.toc() screen = [ 'Epoch %d/%d itr %d/%d:' % (epoch, cfg.end_epoch, itr, trainer.itr_per_epoch), 'lr: %g' % (trainer.scheduler.get_lr()[0]), 'speed: %.2f(%.2fs r%.2f)s/itr' % (trainer.tot_timer.average_time, trainer.gpu_timer.average_time, trainer.read_timer.average_time), '%.2fh/epoch' % (trainer.tot_timer.average_time / 3600. * trainer.itr_per_epoch), '%s: %.4f' % ('loss_loc', JointLocationLoss.detach()), ] trainer.logger.info(' '.join(screen)) trainer.tot_timer.toc() trainer.tot_timer.tic() trainer.read_timer.tic() trainer.save_model( { 'epoch': epoch, 'network': trainer.model.state_dict(), 'optimizer': trainer.optimizer.state_dict(), 'scheduler': trainer.scheduler.state_dict(), }, epoch)