def main(): os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu use_gpu = torch.cuda.is_available() sys.stdout = Logger(osp.join(args.resume, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_dataset(name=args.dataset, root=args.root) # Data augmentation spatial_transform_test = ST.Compose([ ST.Scale((args.height, args.width), interpolation=3), ST.ToTensor(), ST.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) temporal_transform_test = None pin_memory = True if use_gpu else False queryloader = DataLoader(VideoDataset( dataset.query, spatial_transform=spatial_transform_test, temporal_transform=temporal_transform_test), batch_size=1, shuffle=False, num_workers=0, pin_memory=pin_memory, drop_last=False) galleryloader = DataLoader(VideoDataset( dataset.gallery, spatial_transform=spatial_transform_test, temporal_transform=temporal_transform_test), batch_size=1, shuffle=False, num_workers=0, pin_memory=pin_memory, drop_last=False) print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) for epoch in args.test_epochs: model_path = osp.join(args.resume, 'checkpoint_ep' + str(epoch) + '.pth.tar') print("Loading checkpoint from '{}'".format(model_path)) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) if use_gpu: model = model.cuda() print("Evaluate") with torch.no_grad(): test(model, queryloader, galleryloader, use_gpu)
def main(): sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) assert os.path.isdir(args.model_load_dir) print("Restoring model from {}.".format(args.model_load_dir)) checkpoint=flow.train.CheckPoint() checkpoint.load(args.model_load_dir) queryset,galleryset=getDataSets(dataset) print("==> Test") rank1=test(queryset,galleryset,dataset)
def main(): sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) print("==========\nArgs:{}\n==========".format(args)) checkpoint=flow.train.CheckPoint() if args.model_load_dir: assert os.path.isdir(args.model_load_dir) print("Restoring model from {}.".format(args.model_load_dir)) checkpoint.load(args.model_load_dir) else: print("Init model") checkpoint.init() start_epoch = args.start_epoch trainset,queryset,galleryset=getDataSets(dataset) start_time=time.time() train_time = 0 best_rank1 = -np.inf best_epoch = 0 pre_epoch=-1 print("==> Start training") for epoch in range(start_epoch,args.max_epoch): start_train_time = time.time() train(epoch,trainset,dataset) train_time += round(time.time() - start_train_time) if (epoch+1) >= args.start_eval and args.eval_step > 0 and (epoch+1) % args.eval_step == 0 or (epoch+1) == args.max_epoch: print("==> Test") rank1=test(queryset,galleryset,dataset) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 #save checkpoint fpath=osp.join(args.save_dir,'checkpoint_ep' + str(epoch+1)) if os.path.exists(fpath): shutil.rmtree(fpath) if pre_epoch!=-1: shutil.rmtree(osp.join(args.save_dir,'checkpoint_ep' + str(pre_epoch))) pre_epoch=epoch+1 checkpoint.save(fpath) # save best if is_best: best_fpath=osp.join(osp.dirname(fpath), 'best_model') if os.path.exists(best_fpath): shutil.rmtree(best_fpath) shutil.copytree(fpath, best_fpath) print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format(best_rank1, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print("Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.".format(elapsed, train_time))
def train(args, net): # Get DataLoader data_loader = make_dataloader(args) # Get Optimizer optimizer = make_optimizer(args, net) # Get Criterion criterion = Loss(args=args) # Get Timer timer = Chronometer() # Get Logger logger = Logger(args=args) logger.print_net(net) # Check for Multi GPU Support if torch.cuda.device_count() > 1 and args.mGPU: net = torch.nn.DataParallel(net) # Create a directory for training files if not os.path.exists(args.ckpt): os.mkdir(args.ckpt) start_epoch = args.start_epoch if args.resume: checkpoint = torch.load(args.resumed_ckpt) start_epoch = checkpoint['epoch'] best_accuracy = 0.0 timer.set() for epoch in range(start_epoch, args.epochs): logger('Epoch: {}'.format(epoch + 1), prt=False) epoch_train_loss, is_best = 0.0, False with tqdm(total=len(data_loader), ncols=0, file=sys.stdout, desc='Epoch: {}'.format(epoch + 1)) as pbar: for i, in_batch in enumerate(data_loader): optimizer.zero_grad() in_data, target = in_batch # Load to GPU if torch.cuda.is_available(): in_data, target = in_data.cuda(), target.cuda() # Forward Pass predicted = net(in_data) # Backward Pass loss = criterion(predicted, target) epoch_train_loss += loss.item() loss.backward() optimizer.step() # Update Progressbar if i % 50 == 49: logger('[Train loss/batch: {0:.4f}]'.format(loss.item()), prt=False) pbar.set_postfix(Loss=loss.item()) pbar.update() epoch_train_loss /= len(data_loader) message = 'Average Training Loss : {0:.4f}'.format(epoch_train_loss) logger(message) # Check Performance of the trained Model on test set if epoch % args.evaluate_every_n_epoch == args.evaluate_every_n_epoch - 1: print('Network Evaluation...') net.eval() output = evaluate.evaluate(args, net) net.train() logger(output['message']) if output['accuracy'] > best_accuracy: best_accuracy = output['accuracy'] is_best = True # save the checkpoint as best checkpoint so far save_checkpoint( { 'epoch': epoch + 1, 'net_state_dict': net.module.state_dict() if args.mGPU else net.state_dict() }, is_best, filename=os.path.join(args.ckpt, 'checkpoint.pth.tar'), best_filename=os.path.join(args.ckpt, 'best_checkpoint.pth.tar')) timer.stop() message = 'Finished Trainig Session in {0} hours & {1} minutes, Best Accuracy Achieved: {2:.2f}\n'.format( int(timer.elapsed / 3600), int((timer.elapsed % 3600) / 60), best_accuracy) logger(message) logger.end()
dtype=torch.float64, device=pred.device) f, d, e = torch.svd(weight2) loss = self.t_loss_fn(pred, targ) + 0.1 * self.t_loss_fn( Qpose_yuce1, pr_glpose) + 0.01 * self.q_loss_fn(d, listlo) return loss # Config opt = Options().parse() cuda = torch.cuda.is_available() device = "cuda:" + ",".join(str(i) for i in opt.gpus) if cuda else "cpu" logfile = osp.join(opt.runs_dir, 'log.txt') stdout = Logger(logfile) print('Logging to {:s}'.format(logfile)) sys.stdout = stdout # Mode atloc = PoseExpNet() model = atloc train_criterion = AtLocCriterion(saq=opt.beta, learn_beta=True) val_criterion = AtLocCriterion() param_list = [{'params': model.parameters()}] # Optimizer param_list = [{'params': model.parameters()}] if hasattr(train_criterion, 'sax') and hasattr( train_criterion, 'saq') and hasattr(model, 'weight2'): #θΏεεε
def main(config): os.environ['CUDA_VISIBLE_DEVICES'] = config.GPU if not config.EVAL_MODE: sys.stdout = Logger(osp.join(config.OUTPUT, 'log_train.txt')) else: sys.stdout = Logger(osp.join(config.OUTPUT, 'log_test.txt')) print("==========\nConfig:{}\n==========".format(config)) print("Currently using GPU {}".format(config.GPU)) # Set random seed set_seed(config.SEED) # Build dataloader trainloader, queryloader, galleryloader, num_classes = build_dataloader( config) # Build model model, classifier = build_model(config, num_classes) # Build classification and pairwise loss criterion_cla, criterion_pair = build_losses(config) # Build optimizer parameters = list(model.parameters()) + list(classifier.parameters()) if config.TRAIN.OPTIMIZER.NAME == 'adam': optimizer = optim.Adam( parameters, lr=config.TRAIN.OPTIMIZER.LR, weight_decay=config.TRAIN.OPTIMIZER.WEIGHT_DECAY) elif config.TRAIN.OPTIMIZER.NAME == 'adamw': optimizer = optim.AdamW( parameters, lr=config.TRAIN.OPTIMIZER.LR, weight_decay=config.TRAIN.OPTIMIZER.WEIGHT_DECAY) elif config.TRAIN.OPTIMIZER.NAME == 'sgd': optimizer = optim.SGD(parameters, lr=config.TRAIN.OPTIMIZER.LR, momentum=0.9, weight_decay=config.TRAIN.OPTIMIZER.WEIGHT_DECAY, nesterov=True) else: raise KeyError("Unknown optimizer: {}".format( config.TRAIN.OPTIMIZER.NAME)) # Build lr_scheduler scheduler = lr_scheduler.MultiStepLR( optimizer, milestones=config.TRAIN.LR_SCHEDULER.STEPSIZE, gamma=config.TRAIN.LR_SCHEDULER.DECAY_RATE) start_epoch = config.TRAIN.START_EPOCH if config.MODEL.RESUME: print("Loading checkpoint from '{}'".format(config.MODEL.RESUME)) checkpoint = torch.load(config.MODEL.RESUME) model.load_state_dict(checkpoint['state_dict']) start_epoch = checkpoint['epoch'] model = nn.DataParallel(model).cuda() classifier = nn.DataParallel(classifier).cuda() if config.EVAL_MODE: print("Evaluate only") test(model, queryloader, galleryloader) return start_time = time.time() train_time = 0 best_rank1 = -np.inf best_epoch = 0 print("==> Start training") for epoch in range(start_epoch, config.TRAIN.MAX_EPOCH): start_train_time = time.time() train(epoch, model, classifier, criterion_cla, criterion_pair, optimizer, trainloader) train_time += round(time.time() - start_train_time) if (epoch+1) > config.TEST.START_EVAL and config.TEST.EVAL_STEP > 0 and \ (epoch+1) % config.TEST.EVAL_STEP == 0 or (epoch+1) == config.TRAIN.MAX_EPOCH: print("==> Test") rank1 = test(model, queryloader, galleryloader) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 state_dict = model.module.state_dict() save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(config.OUTPUT, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) scheduler.step() print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format( best_rank1, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time))
def main(): torch.manual_seed(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu use_gpu = torch.cuda.is_available() if args.use_cpu: use_gpu = False sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu)) torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU (GPU is highly recommended)") print("Initializing dataset {}".format(args.dataset)) dataset = data_manager.init_dataset(name=args.dataset, root=args.root) # Data augmentation spatial_transform_train = ST.Compose([ ST.Scale((args.height, args.width), interpolation=3), ST.RandomHorizontalFlip(), ST.ToTensor(), ST.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) temporal_transform_train = TT.TemporalRandomCrop(size=args.seq_len, stride=args.sample_stride) spatial_transform_test = ST.Compose([ ST.Scale((args.height, args.width), interpolation=3), ST.ToTensor(), ST.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) temporal_transform_test = TT.TemporalBeginCrop() pin_memory = True if use_gpu else False if args.dataset != 'mars': trainloader = DataLoader( VideoDataset(dataset.train_dense, spatial_transform=spatial_transform_train, temporal_transform=temporal_transform_train), sampler=RandomIdentitySampler(dataset.train_dense, num_instances=args.num_instances), batch_size=args.train_batch, num_workers=args.workers, pin_memory=pin_memory, drop_last=True) else: trainloader = DataLoader( VideoDataset(dataset.train, spatial_transform=spatial_transform_train, temporal_transform=temporal_transform_train), sampler=RandomIdentitySampler(dataset.train, num_instances=args.num_instances), batch_size=args.train_batch, num_workers=args.workers, pin_memory=pin_memory, drop_last=True) queryloader = DataLoader(VideoDataset( dataset.query, spatial_transform=spatial_transform_test, temporal_transform=temporal_transform_test), batch_size=args.test_batch, shuffle=False, num_workers=0, pin_memory=pin_memory, drop_last=False) galleryloader = DataLoader(VideoDataset( dataset.gallery, spatial_transform=spatial_transform_test, temporal_transform=temporal_transform_test), batch_size=args.test_batch, shuffle=False, num_workers=0, pin_memory=pin_memory, drop_last=False) print("Initializing model: {}".format(args.arch)) model = models.init_model(name=args.arch, num_classes=dataset.num_train_pids) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) criterion_xent = nn.CrossEntropyLoss() criterion_htri = TripletLoss(margin=args.margin, distance=args.distance) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=args.stepsize, gamma=args.gamma) start_epoch = args.start_epoch if args.resume: print("Loading checkpoint from '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) start_epoch = checkpoint['epoch'] if use_gpu: model = nn.DataParallel(model).cuda() # model = model.cuda() start_time = time.time() train_time = 0 best_rank1 = -np.inf best_epoch = 0 print("==> Start training") for epoch in range(start_epoch, args.max_epoch): scheduler.step() start_train_time = time.time() train(epoch, model, criterion_xent, criterion_htri, optimizer, trainloader, use_gpu) train_time += round(time.time() - start_train_time) if (epoch + 1) >= args.start_eval and args.eval_step > 0 and ( epoch + 1) % args.eval_step == 0 or (epoch + 1) == args.max_epoch: print("==> Test") with torch.no_grad(): # test using 4 frames rank1 = test(model, queryloader, galleryloader, use_gpu) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint( { 'state_dict': state_dict, 'rank1': rank1, 'epoch': epoch, }, is_best, osp.join(args.save_dir, 'checkpoint_ep' + str(epoch + 1) + '.pth.tar')) print("==> Best Rank-1 {:.1%}, achieved at epoch {}".format( best_rank1, best_epoch)) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) train_time = str(datetime.timedelta(seconds=train_time)) print( "Finished. Total elapsed time (h:m:s): {}. Training time (h:m:s): {}.". format(elapsed, train_time))