def train(DATA_PATH, MATRIX_PATH, MODEL_PATH, data_config, model_config): features = du.create_features_optimized(DATA_PATH, MATRIX_PATH, generate_matrix=True) pair = du.generate_windows(features, data_config) m = pair[0][0].shape[0] train_generator = du.generator(pair, data_config.window_train, data_config.window_pred, data_config.batch_size) print('*' * 50, 'Training Model', '*' * 50) model = models.get_baseline_model(data_config) steps_per_epoch = (m // data_config.batch_size) * 11 history = model.fit_generator(train_generator, epochs=model_config.epochs, steps_per_epoch=steps_per_epoch) model.save_weights(MODEL_PATH + f'weight_{model_config.model_name}.h5') with open(MODEL_PATH + f'history_{model_config.model_name}.pkl', 'wb') as f: pickle.dump(history.history, f) return features, model
def test(**kwargs): opt._parse(kwargs) sys.stdout = Logger(osp.join(opt.save_dir, 'log_test.txt')) ctx = mx.gpu(0) print('initializing dataset {}'.format(opt.dataset)) dataset = data_manager.init_dataset(name=opt.dataset) queryloader = DataLoader( ImageData(dataset.query, TestTransform(opt.height, opt.width)), batch_size=opt.test_batch, num_workers=opt.workers, ) galleryloader = DataLoader( ImageData(dataset.gallery, TestTransform(opt.height, opt.width)), batch_size=opt.test_batch, num_workers=opt.workers, ) print('loading model ...') model = get_baseline_model(dataset.num_train_pids, ctx) model.load_parameters(opt.load_model, ctx) print('model size: {:.5f}M'.format( sum(p.data().size for p in model.collect_params().values()) / 1e6)) reid_evaluator = reidEvaluator(model, ctx) reid_evaluator.evaluate(queryloader, galleryloader)
def __init__( self, model_path_='/unsullied/sharefs/zhongyunshan/isilon-home/model-parameters/DeepFashion-correct-background/model_best.pth.tar', size=(256, 128), gpu_ids=None, is_trainable=False, w=[1, 1, 1, 1]): super(ReIDLoss, self).__init__() self.size = size self.gpu_ids = gpu_ids self.w = w model, optim_policy = get_baseline_model(num_classes=5837, eval_norm=0, model_path=None) param_dict = torch.load(model_path_) model.load_state_dict(param_dict['state_dict']) if 'best_rank1' in param_dict.keys(): print('load!') best_rank1 = param_dict['best_rank1'] best_epoch = param_dict['best_epoch'] print("best rank1 = {} at best epoch = {}".format( best_rank1, best_epoch)) self.model = model if gpu_ids is not None: self.model.cuda() for n, m in self.model.base.named_children(): print(n) self.is_trainable = is_trainable for param in self.model.parameters(): param.requires_grad = self.is_trainable self.MSELoss = nn.MSELoss() self.triple_feature_loss = nn.L1Loss() self.normalize_mean = torch.Tensor([0.485, 0.456, 0.406]) self.normalize_mean = self.normalize_mean.expand(256, 128, 3).permute( 2, 0, 1) # 调整为通道在前 self.normalize_std = torch.Tensor([0.229, 0.224, 0.225]) self.normalize_std = self.normalize_std.expand(256, 128, 3).permute(2, 0, 1) # 调整为通道在前 if gpu_ids is not None: self.normalize_std = self.normalize_std.cuda() self.normalize_mean = self.normalize_mean.cuda()
def load_model(model_path=None, num_of_classes=0): model, optim_policy = get_baseline_model(model_path=None, num_of_classes=num_of_classes) if model_path is not None: model_dict = model.state_dict() pretrained_params = torch.load(model_path) new_dict = { k: v for k, v in pretrained_params['state_dict'].items() if k in model_dict.keys() } # model_dict.update(new_dict) model.load_state_dict(new_dict, strict=False) print('model', model_path.split('/')[-1], 'loaded.') model = model.cuda() return model, optim_policy
def test(**kwargs): opt._parse(kwargs) # set random seed and cudnn benchmark torch.manual_seed(opt.seed) use_gpu = torch.cuda.is_available() sys.stdout = Logger(osp.join(opt.save_dir, 'log_train.txt')) if use_gpu: print('currently using GPU {}'.format(opt.gpu)) cudnn.benchmark = True torch.cuda.manual_seed_all(opt.seed) os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpu else: print('currently using cpu') print('initializing dataset {}'.format(opt.dataset)) dataset = data_manager.init_dataset(name=opt.dataset) pin_memory = True if use_gpu else False queryloader = DataLoader(ImageData(dataset.query, TestTransform(opt.height, opt.width)), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) galleryloader = DataLoader(ImageData(dataset.gallery, TestTransform(opt.height, opt.width)), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) print('loading model ...') model, optim_policy = get_baseline_model(dataset.num_train_pids) # ckpt = torch.load(opt.load_model) # model.load_state_dict(ckpt['state_dict']) print('model size: {:.5f}M'.format( sum(p.numel() for p in model.parameters()) / 1e6)) if use_gpu: model = nn.DataParallel(model).cuda() reid_evaluator = ResNetEvaluator(model) reid_evaluator.evaluate(queryloader, galleryloader)
def train(**kwargs): opt._parse(kwargs) # set random seed and cudnn benchmark torch.manual_seed(opt.seed) use_gpu = torch.cuda.is_available() sys.stdout = Logger(osp.join(opt.save_dir, 'log_train.txt')) print('=========user config==========') pprint(opt._state_dict()) print('============end===============') if use_gpu: print('currently using GPU') cudnn.benchmark = True torch.cuda.manual_seed_all(opt.seed) else: print('currently using cpu') print('initializing dataset {}'.format(opt.dataset)) dataset = data_manager.init_dataset(name=opt.dataset) pin_memory = True if use_gpu else False summary_writer = SummaryWriter(osp.join(opt.save_dir, 'tensorboard_log')) if 'triplet' in opt.model_name: trainloader = DataLoader( ImageData(dataset.train, TrainTransform(opt.height, opt.width)), sampler=RandomIdentitySampler(dataset.train, opt.num_instances), batch_size=opt.train_batch, num_workers=opt.workers, pin_memory=pin_memory, drop_last=True) else: trainloader = DataLoader(ImageData( dataset.train, TrainTransform(opt.height, opt.width)), batch_size=opt.train_batch, shuffle=True, num_workers=opt.workers, pin_memory=pin_memory) queryloader = DataLoader(ImageData(dataset.query, TestTransform(opt.height, opt.width)), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) galleryloader = DataLoader(ImageData(dataset.gallery, TestTransform(opt.height, opt.width)), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) print('initializing model ...') if opt.model_name == 'softmax' or opt.model_name == 'softmax_triplet': model, optim_policy = get_baseline_model(dataset.num_train_pids) elif opt.model_name == 'triplet': model, optim_policy = get_baseline_model(num_classes=None) print('model size: {:.5f}M'.format( sum(p.numel() for p in model.parameters()) / 1e6)) # xent_criterion = nn.CrossEntropyLoss() xent_criterion = CrossEntropyLabelSmooth(dataset.num_train_pids) tri_criterion = TripletLoss(opt.margin) def cls_criterion(cls_scores, targets): cls_loss = xent_criterion(cls_scores, targets) return cls_loss def triplet_criterion(feat, targets): triplet_loss, _, _ = tri_criterion(feat, targets) return triplet_loss def cls_tri_criterion(cls_scores, feat, targets): cls_loss = xent_criterion(cls_scores, targets) triplet_loss, _, _ = tri_criterion(feat, targets) loss = cls_loss + triplet_loss return loss # get optimizer optimizer = torch.optim.Adam(optim_policy, lr=opt.lr, weight_decay=opt.weight_decay) def adjust_lr(optimizer, ep): if ep < 20: lr = 1e-4 * (ep + 1) / 2 elif ep < 80: lr = 1e-3 * opt.num_gpu elif ep < 180: lr = 1e-4 * opt.num_gpu elif ep < 300: lr = 1e-5 * opt.num_gpu elif ep < 320: lr = 1e-5 * 0.1**((ep - 320) / 80) * opt.num_gpu elif ep < 400: lr = 1e-6 elif ep < 480: lr = 1e-4 * opt.num_gpu else: lr = 1e-5 * opt.num_gpu for p in optimizer.param_groups: p['lr'] = lr start_epoch = opt.start_epoch if use_gpu: model = nn.DataParallel(model).cuda() # get trainer and evaluator if opt.model_name == 'softmax': reid_trainer = clsTrainer(opt, model, optimizer, cls_criterion, summary_writer) elif opt.model_name == 'softmax_triplet': reid_trainer = cls_tripletTrainer(opt, model, optimizer, cls_tri_criterion, summary_writer) elif opt.model_name == 'triplet': reid_trainer = tripletTrainer(opt, model, optimizer, triplet_criterion, summary_writer) reid_evaluator = ResNetEvaluator(model) # start training best_rank1 = -np.inf best_epoch = 0 for epoch in range(start_epoch, opt.max_epoch): if opt.step_size > 0: adjust_lr(optimizer, epoch + 1) reid_trainer.train(epoch, trainloader) # skip if not save model if opt.eval_step > 0 and (epoch + 1) % opt.eval_step == 0 or ( epoch + 1) == opt.max_epoch: rank1 = reid_evaluator.evaluate(queryloader, galleryloader) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': epoch + 1, }, is_best=is_best, save_dir=opt.save_dir, filename='checkpoint_ep' + str(epoch + 1) + '.pth.tar') print('Best rank-1 {:.1%}, achived at epoch {}'.format( best_rank1, best_epoch))
def trainer(data_pth, a, b, _time=0, layers=18): seed = 0 # dataset options height, width = 128, 128 # optimization options optim = 'Adam' max_epoch = 20 train_batch = 64 test_batch = 64 lr = 0.1 step_size = 40 gamma = 0.1 weight_decay = 5e-4 momentum = 0.9 test_margin = b margin = a num_instances = 4 num_gpu = 1 # model options last_stride = 1 pretrained_model_18 = 'model/resnet18-5c106cde.pth' pretrained_model_50 = 'model/resnet50-19c8e357.pth' pretrained_model_34 = 'model/resnet34-333f7ec4.pth' pretrained_model_101 = 'model/resnet101-5d3b4d8f.pth' pretrained_model_152 = 'model/resnet152-b121ed2d.pth' # miscs print_freq = 10 eval_step = 1 save_dir = 'model/pytorch-ckpt/time%d' % _time workers = 1 torch.manual_seed(seed) use_gpu = torch.cuda.is_available() if use_gpu: print('currently using GPU') cudnn.benchmark = True torch.cuda.manual_seed_all(seed) else: print('currently using cpu') pin_memory = True if use_gpu else False # model, optim_policy = get_baseline_model(model_path=pretrained_model) if layers == 18: model, optim_policy = get_baseline_model( model_path=pretrained_model_18, layers=18) else: model, optim_policy = get_baseline_model( model_path=pretrained_model_50, layers=50) # model, optim_policy = get_baseline_model(model_path=pretrained_model_18, layers=18) # model, optim_policy = get_baseline_model(model_path=pretrained_model_34, layers=34) # model, optim_policy = get_baseline_model(model_path=pretrained_model_101, layers=101) # model = load_model(model, model_path='./model/pytorch-ckpt/87_layers18_margin20_epoch87.tar') print('model\'s parameters size: {:.5f} M'.format( sum(p.numel() for p in model.parameters()) / 1e6)) tri_criterion = TripletLoss(margin) # get optimizer optimizer = torch.optim.Adam(optim_policy, lr=lr, weight_decay=weight_decay) def adjust_lr(optimizer, ep): if ep < 20: lr = 1e-4 * (ep + 1) / 2 elif ep < 80: lr = 1e-3 * num_gpu elif ep < 180: lr = 1e-4 * num_gpu elif ep < 300: lr = 1e-5 * num_gpu elif ep < 320: lr = 1e-5 * 0.1**((ep - 320) / 80) * num_gpu elif ep < 400: lr = 1e-6 elif ep < 480: lr = 1e-4 * num_gpu else: lr = 1e-5 * num_gpu for p in optimizer.param_groups: p['lr'] = lr if use_gpu: model = nn.DataParallel(model).cuda() max_acc = .0 for epoch in range(max_epoch): if step_size > 0: adjust_lr(optimizer, epoch + 1) next_margin = margin # skip if not save model if eval_step > 0 and (epoch + 1) % eval_step == 0 or (epoch + 1) == max_epoch: _t1 = time.time() train(model, optimizer, tri_criterion, epoch + 1, print_freq, None, data_pth=data_pth) _t2 = time.time() print('time for training:', '%.2f' % (_t2 - _t1), 's') acc = evaluate_model(model, margin=20, epoch=1) if acc > max_acc: max_acc = acc print('max acc:', max_acc, ', epoch:', epoch + 1) if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_model_name = 'layers{}_margin{}_epoch{}.tar'.format( layers, margin, epoch + 1) save_checkpoint( { 'state_dict': state_dict, 'epoch': epoch + 1, }, is_best=False, save_dir=save_dir, filename=save_model_name) margin = next_margin return save_model_name
def __init__( self, model_path_='/unsullied/sharefs/zhongyunshan/isilon-home/model-parameters/Reid_Baseline/model_best92.3.pth.tar', size=(256, 128), gpu_ids=None, is_trainable=False, w=[1, 1, 1, 1]): super(ReIDLoss, self).__init__() self.size = size self.gpu_ids = gpu_ids self.w = w model, optim_policy = get_baseline_model(num_classes=751, eval_norm=0, model_path=None) param_dict = torch.load(model_path_) model.load_state_dict(param_dict['state_dict']) if 'best_rank1' in param_dict.keys(): best_rank1 = param_dict['best_rank1'] best_epoch = param_dict['best_epoch'] print("best rank1 = {} at best epoch = {}".format( best_rank1, best_epoch)) ''' model_structure = torchvision.models.resnet50(pretrained=False) model_dict = model_structure.state_dict() checkpoint = torch.load(model_path) checkpoint_load = {k[5:]: v for k, v in (checkpoint['state_dict']).items() if k[5:] in model_dict} model_dict.update(checkpoint_load) model_structure.load_state_dict(model_dict) self.model = model_structure self.model.eval() ''' self.model = model if gpu_ids is not None: self.model.cuda() for n, m in self.model.base.named_children(): print(n) self.is_trainable = is_trainable for param in self.model.parameters(): param.requires_grad = self.is_trainable self.MSELoss = nn.MSELoss() self.triple_feature_loss = nn.L1Loss() self.normalize_mean = torch.Tensor([0.485, 0.456, 0.406]) self.normalize_mean = self.normalize_mean.expand(256, 128, 3).permute( 2, 0, 1) # 调整为通道在前 self.normalize_std = torch.Tensor([0.229, 0.224, 0.225]) self.normalize_std = self.normalize_std.expand(256, 128, 3).permute(2, 0, 1) # 调整为通道在前 if gpu_ids is not None: self.normalize_std = self.normalize_std.cuda() self.normalize_mean = self.normalize_mean.cuda()
def trainer(data_pth, a, b, _time=0, layers=18): seed = 0 # dataset options height = 128 width = 128 # optimization options optim = 'Adam' max_epoch = 20 train_batch = 64 test_batch = 64 lr = 0.1 step_size = 40 gamma = 0.1 weight_decay = 5e-4 momentum = 0.9 test_margin = b margin = a num_instances = 4 num_gpu = 1 # model options last_stride = 1 pretrained_model_18 = 'model/resnet18-5c106cde.pth' pretrained_model_50 = 'model/resnet50-19c8e357.pth' pretrained_model_34 = 'model/resnet34-333f7ec4.pth' pretrained_model_101 = 'model/resnet101-5d3b4d8f.pth' pretrained_model_152 = 'model/resnet152-b121ed2d.pth' # miscs print_freq = 20 eval_step = 1 save_dir = 'model/pytorch-ckpt/time%d' % _time workers = 1 start_epoch = 0 torch.manual_seed(seed) use_gpu = torch.cuda.is_available() if use_gpu: print('currently using GPU') cudnn.benchmark = True torch.cuda.manual_seed_all(seed) else: print('currently using cpu') pin_memory = True if use_gpu else False print('initializing dataset {}'.format('Tableware')) dataset = Tableware(data_pth) trainloader = DataLoader( ImageData(dataset.train, TrainTransform(height, width)), batch_size=train_batch, num_workers=workers, pin_memory=pin_memory, drop_last=True ) # testloader = DataLoader( # ImageData(dataset.test, TestTransform(height, width)), # batch_size=test_batch, num_workers=workers, # pin_memory=pin_memory, drop_last=True # ) # model, optim_policy = get_baseline_model(model_path=pretrained_model) if layers == 18: model, optim_policy = get_baseline_model(model_path=pretrained_model_18, layers=18) else: model, optim_policy = get_baseline_model(model_path=pretrained_model_50, layers=50) # model, optim_policy = get_baseline_model(model_path=pretrained_model_18, layers=18) # model, optim_policy = get_baseline_model(model_path=pretrained_model_34, layers=34) # model, optim_policy = get_baseline_model(model_path=pretrained_model_101, layers=101) # model = load_model(model, model_path='./model/pytorch-ckpt/87_layers18_margin20_epoch87.tar') print('model\'s parameters size: {:.5f} M'.format(sum(p.numel() for p in model.parameters()) / 1e6)) inner_dist = 0 outer_dist = 0 max_outer = 0 min_outer = 0 max_iner = 0 min_iner = 0 tri_criterion = TripletLoss(margin) # get optimizer optimizer = torch.optim.Adam( optim_policy, lr=lr, weight_decay=weight_decay ) def adjust_lr(optimizer, ep): if ep < 20: lr = 1e-4 * (ep + 1) / 2 elif ep < 80: lr = 1e-3 * num_gpu elif ep < 180: lr = 1e-4 * num_gpu elif ep < 300: lr = 1e-5 * num_gpu elif ep < 320: lr = 1e-5 * 0.1 ** ((ep - 320) / 80) *num_gpu elif ep < 400: lr = 1e-6 elif ep < 480: lr = 1e-4 * num_gpu else: lr = 1e-5 * num_gpu for p in optimizer.param_groups: p['lr'] = lr if use_gpu: model = nn.DataParallel(model).cuda() evaluator = Evaluator(model) for epoch in range(start_epoch, max_epoch): if step_size > 0: adjust_lr(optimizer, epoch + 1) next_margin = margin # skip if not save model if eval_step > 0 and (epoch + 1) % eval_step == 0 or (epoch + 1) == max_epoch: save_record_path = 'margin_'+ str(margin) + '_epoch_' + str(epoch + 1) + '.txt' _t1 =time.time() train(model, optimizer, tri_criterion, epoch, print_freq, trainloader, data_pth=data_pth) _t2 = time.time() print('time for training:', '%.2f' % (_t2 - _t1), 's') """ acc, inner_dist, outer_dist, max_outer, min_outer, max_iner, min_iner = evaluator.evaluate(testloader, test_margin, save_record_path) print('margin:{}, epoch:{}, acc:{}'.format(margin, epoch+1, acc)) f = open('record.txt', 'a') f.write('margin:{}, epoch:{}, acc:{}\n'.format(margin, epoch+1, acc)) f.close() """ is_best = False # save_model_path = 'new_margin({})_epoch({}).pth.tar'.format(margin, epoch+1) save_model_path = 'time{}_layers{}_margin{}_epoch{}.tar'.format(_time, layers, margin, epoch+1) # save_model_path = 'layers34_margin{}_epoch{}.tar'.format(margin, epoch+1) # save_model_path = 'layers101_margin{}_epoch{}.tar'.format(margin, epoch+1) if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': epoch + 1, }, is_best=is_best, save_dir=save_dir, filename=save_model_path) model.eval() acc = do_get_feature_and_t(model, margin=20, epoch=1) margin = next_margin return save_model_path, inner_dist, outer_dist, max_outer, min_outer, max_iner, min_iner
@tf.function def evaluate( model, eval_ds, metric_fn, ): metric_fn.reset_states() for sup_images, sup_labels in eval_ds: metric_fn(sup_labels, model(sup_images)) if __name__ == "__main__": train_sup_ds, train_unsup_ds, test_sup_ds = get_data() baseline_model = models.get_baseline_model(width=32, height=32, n_channel=3) opt = tf.optimizers.Adam() # writer = tf.summary.create_file_writer(logdir="logs/func/temp") train_with_uda(train_sup_dataset=train_sup_ds, train_unsup_dataset=train_unsup_ds, model=baseline_model, optimizer=opt, n_step=tf.constant(100000, name="n_step")) # train( # train_sup_dataset=train_sup_ds, # model=baseline_model, # optimizer=opt,
def train(**kwargs): opt._parse(kwargs) # set random seed and cudnn benchmark sys.stdout = Logger(osp.join(opt.save_dir, 'log_train.txt')) print('=========user config==========') pprint(opt._state_dict()) print('============end===============') print('initializing dataset {}'.format(opt.dataset)) dataset = data_manager.init_dataset(name=opt.dataset) summary_writer = SummaryWriter(osp.join(opt.save_dir, 'tensorboard_log')) if 'triplet' in opt.model_name: trainloader = DataLoader( ImageData(dataset.train, TrainTransform(opt.height, opt.width)), sampler=RandomIdentitySampler(dataset.train, opt.num_instances), batch_size=opt.train_batch, num_workers=opt.workers, last_batch='discard') else: trainloader = DataLoader( ImageData(dataset.train, TrainTransform(opt.height, opt.width)), batch_size=opt.train_batch, shuffle=True, num_workers=opt.workers, ) queryloader = DataLoader( ImageData(dataset.query, TestTransform(opt.height, opt.width)), batch_size=opt.test_batch, num_workers=opt.workers, ) galleryloader = DataLoader( ImageData(dataset.gallery, TestTransform(opt.height, opt.width)), batch_size=opt.test_batch, num_workers=opt.workers, ) print('initializing model ...') model = get_baseline_model(dataset.num_train_pids, mx.gpu(0), opt.pretrained_model) print('model size: {:.5f}M'.format( sum(p.data().size for p in model.collect_params().values()) / 1e6)) xent_criterion = gluon.loss.SoftmaxCrossEntropyLoss() tri_criterion = TripletLoss(opt.margin) def cls_criterion(cls_scores, feat, targets): cls_loss = xent_criterion(cls_scores, targets) return cls_loss def triplet_criterion(cls_scores, feat, targets): triplet_loss, dist_ap, dist_an = tri_criterion(feat, targets) return triplet_loss def cls_tri_criterion(cls_scores, feat, targets): cls_loss = xent_criterion(cls_scores, targets) triplet_loss, dist_ap, dist_an = tri_criterion(feat, targets) loss = cls_loss + triplet_loss return loss # get optimizer optimizer = gluon.Trainer(model.collect_params(), opt.optim, { 'learning_rate': opt.lr, 'wd': opt.weight_decay }) def adjust_lr(optimizer, ep): if ep < 20: lr = 1e-4 * (ep + 1) / 2 elif ep < 80: lr = 1e-3 * opt.num_gpu elif ep < 180: lr = 1e-4 * opt.num_gpu elif ep < 300: lr = 1e-5 * opt.num_gpu elif ep < 320: lr = 1e-5 * 0.1**((ep - 320) / 80) * opt.num_gpu elif ep < 400: lr = 1e-6 elif ep < 480: lr = 1e-4 * opt.num_gpu else: lr = 1e-5 * opt.num_gpu optimizer.set_learning_rate(lr) start_epoch = opt.start_epoch # get trainer and evaluator use_criterion = None if opt.model_name == 'softmax': use_criterion = cls_criterion elif opt.model_name == 'softmax_triplet': use_criterion = cls_tri_criterion elif opt.model_name == 'triplet': use_criterion = triplet_criterion reid_trainer = reidTrainer(opt, model, optimizer, use_criterion, summary_writer, mx.gpu(0)) reid_evaluator = reidEvaluator(model, mx.gpu(0)) # start training best_rank1 = -np.inf best_epoch = 0 for epoch in range(start_epoch, opt.max_epoch): if opt.step_size > 0: adjust_lr(optimizer, epoch + 1) reid_trainer.train(epoch, trainloader) # skip if not save model if opt.eval_step > 0 and (epoch + 1) % opt.eval_step == 0 or ( epoch + 1) == opt.max_epoch: rank1 = reid_evaluator.evaluate(queryloader, galleryloader) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 state_dict = {'model': model, 'epoch': epoch} save_checkpoint(state_dict, is_best=is_best, save_dir=opt.save_dir, filename='checkpoint_ep' + str(epoch + 1) + '.params') print('Best rank-1 {:.1%}, achived at epoch {}'.format( best_rank1, best_epoch))
def trainer(data_pth): seed = 0 # dataset options height = 128 width = 128 # optimization options optim = 'Adam' max_epoch = 1 train_batch = 64 test_batch = 64 lr = 0.1 step_size = 40 gamma = 0.1 weight_decay = 5e-4 momentum = 0.9 test_margin = 10.0 margin = 1.0 num_instances = 4 num_gpu = 1 # model options last_stride = 1 pretrained_model = 'model/resnet50-19c8e357.pth' # miscs print_freq = 20 eval_step = 1 save_dir = 'model/pytorch-ckpt/' workers = 1 start_epoch = 0 torch.manual_seed(seed) use_gpu = torch.cuda.is_available() if use_gpu: print('currently using GPU') cudnn.benchmark = True torch.cuda.manual_seed_all(seed) else: print('currently using cpu') pin_memory = True if use_gpu else False print('initializing dataset {}'.format('Tableware')) dataset = Tableware(data_pth) trainloader = DataLoader(ImageData(dataset.train, TrainTransform(height, width)), batch_size=train_batch, num_workers=workers, pin_memory=pin_memory, drop_last=True) testloader = DataLoader(ImageData(dataset.test, TestTransform(height, width)), batch_size=test_batch, num_workers=workers, pin_memory=pin_memory, drop_last=True) model, optim_policy = get_baseline_model(model_path=pretrained_model) print('model size: {:.5f}M'.format( sum(p.numel() for p in model.parameters()) / 1e6)) tri_criterion = TripletLoss(margin) # get optimizer optimizer = torch.optim.Adam(optim_policy, lr=lr, weight_decay=weight_decay) def adjust_lr(optimizer, ep): if ep < 20: lr = 1e-4 * (ep + 1) / 2 elif ep < 80: lr = 1e-3 * num_gpu elif ep < 180: lr = 1e-4 * num_gpu elif ep < 300: lr = 1e-5 * num_gpu elif ep < 320: lr = 1e-5 * 0.1**((ep - 320) / 80) * num_gpu elif ep < 400: lr = 1e-6 elif ep < 480: lr = 1e-4 * num_gpu else: lr = 1e-5 * num_gpu for p in optimizer.param_groups: p['lr'] = lr if use_gpu: model = nn.DataParallel(model).cuda() evaluator = Evaluator(model) # start training best_acc = -np.inf best_epoch = 0 for epoch in range(start_epoch, max_epoch): if step_size > 0: adjust_lr(optimizer, epoch + 1) train(model, optimizer, tri_criterion, epoch, print_freq, trainloader) # skip if not save model if eval_step > 0 and (epoch + 1) % eval_step == 0 or (epoch + 1) == max_epoch: acc = evaluator.evaluate(testloader, test_margin) is_best = acc > best_acc if is_best: best_acc = acc best_epoch = epoch + 1 if use_gpu: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': epoch + 1, }, is_best=is_best, save_dir=save_dir, filename='checkpoint_ep' + str(epoch + 1) + '.pth.tar') print('Best accuracy {:.1%}, achieved at epoch {}'.format( best_acc, best_epoch))
def train(**kwargs): #### Part 1 : Initialization opt._parse(kwargs) torch.backends.cudnn.deterministic = True # set random seed and cudnn benchmark #torch.manual_seed(opt.seed) #random.seed(opt.seed) #np.random.seed(opt.seed) use_gpu = torch.cuda.is_available() sys.stdout = Logger(osp.join(opt.save_dir, 'log_train.txt')) print('=========user config==========') pprint(opt._state_dict()) print('============end===============') if use_gpu: print('currently using GPU') cudnn.benchmark = True torch.cuda.manual_seed_all(opt.seed) else: print('currently using cpu') #### Part 2 : Preparing Data print('initializing train dataset {}'.format(opt.trainset)) train_dataset = data_manager.init_dataset(name=opt.trainset) print('initializing test dataset {}'.format(opt.testset)) test_dataset = data_manager.init_dataset(name=opt.testset) pin_memory = True if use_gpu else False pin_memory = False summary_writer = SummaryWriter(osp.join(opt.save_dir, 'tensorboard_log')) collateFn = NormalCollateFn() if opt.sampler == "randomidentity": trainloader = DataLoader( data_manager.init_datafolder( opt.trainset, train_dataset.train, TrainTransform(opt.height, opt.width, random_erase=opt.with_randomerase), if_train=True), sampler=RandomIdentitySampler(train_dataset.train, opt.num_instances), batch_size=opt.train_batch, num_workers=opt.workers, pin_memory=pin_memory, drop_last=True, collate_fn=collateFn, ) elif opt.sampler == "randomidentitycamera": trainloader = DataLoader( data_manager.init_datafolder( opt.trainset, train_dataset.train, TrainTransform(opt.height, opt.width, random_erase=opt.with_randomerase), if_train=True), batch_sampler=RandomIdentityCameraSampler(train_dataset.train, opt.num_instances, opt.train_batch), num_workers=opt.workers, pin_memory=pin_memory, collate_fn=collateFn, ) queryloader = DataLoader(data_manager.init_datafolder(opt.testset, test_dataset.query, TestTransform( opt.height, opt.width), if_train=False), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) galleryloader = DataLoader(data_manager.init_datafolder( opt.testset, test_dataset.gallery, TestTransform(opt.height, opt.width), if_train=False), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) #### Part 3 : Preparing Backbone Network print('initializing model ...') if opt.model_name in ['triplet', 'distance']: model, optim_policy = get_baseline_model(num_classes=None, model='triplet') elif opt.model_name in ["softmax"]: model, optim_policy = get_baseline_model(train_dataset.num_train_pids, model='softmax', drop_prob=opt.drop) else: assert False, "unknown model name" if (not opt.model_path == 'zero') and 'tar' in opt.model_path: print('load pretrain reid model......' + opt.model_path) ckpt = torch.load(opt.model_path) # remove classifer tmp = dict() for k, v in ckpt['state_dict'].items(): if opt.keep_layer: for i in opt.keep_layer: if 'layer' + str(i) in k: #print(k+" skip....") continue if opt.keepfc or ('fc' not in k and 'classifier' not in k): tmp[k] = v ckpt['state_dict'] = tmp model.load_state_dict(ckpt['state_dict'], strict=False) print('model size: {:.5f}M'.format( sum(p.numel() for p in model.parameters()) / 1e6)) #### Part 4: Preparing Loss Functions if opt.margin1 is not None: distance_loss = DistanceLoss(margin=(opt.margin1, opt.margin2)) else: distance_loss = DistanceLoss() tri_loss = TripletLoss(margin=opt.margin) xent_loss = nn.CrossEntropyLoss() vis = dict() vis['tri_acc1'] = AverageMeter() vis['tri_acc2'] = AverageMeter() vis['cls_accuracy'] = AverageMeter() vis['cls_loss'] = AverageMeter() def dist_criterion(feat, targets, cameras, model=None, paths=None, epoch=0): dis_loss, tri_acc1, tri_acc2 = distance_loss(feat, targets, cameras, model, paths, epoch=epoch) vis['tri_acc1'].update(float(tri_acc1)) vis['tri_acc2'].update(float(tri_acc2)) return dis_loss def triplet_criterion(feat, targets): triplet_loss, tri_accuracy, _, _ = tri_loss(feat, targets) vis['tri_acc1'].update(float(tri_accuracy)) return triplet_loss def cls_criterion(cls_scores, targets): cls_loss = xent_loss(cls_scores, targets) _, preds = torch.max(cls_scores.data, 1) corrects = float(torch.sum(preds == targets.data)) vis['cls_accuracy'].update(float(corrects / opt.train_batch)) vis['cls_loss'].update(float(cls_loss)) return cls_loss #### Part 5: Preparing Optimizer and Trainer optimizer, adjust_lr = get_optimizer_strategy(opt.model_name, optim_policy, opt) start_epoch = opt.start_epoch if use_gpu: model = nn.DataParallel(model).cuda() #model=model.cuda() # get trainer and evaluatori if opt.model_name == "distance": reid_trainer = tripletTrainer(opt, model, optimizer, dist_criterion, summary_writer, need_cam=True) elif opt.model_name == 'triplet' or opt.model_name == 'triplet_fc': reid_trainer = tripletTrainer(opt, model, optimizer, triplet_criterion, summary_writer) elif opt.model_name == 'softmax': reid_trainer = clsTrainer(opt, model, optimizer, cls_criterion, summary_writer) else: print("Error: Unknown model name {}".format(opt.model_name)) reid_evaluator = evaluator_manager.init_evaluator(opt.testset, model, flip=True) #### Part 6 : Training best_rank1 = -np.inf best_epoch = 0 for epoch in range(start_epoch, opt.max_epoch): if opt.step_size > 0: current_lr = adjust_lr(optimizer, epoch) reid_trainer.train(epoch, trainloader) for k, v in vis.items(): print("{}:{}".format(k, v.mean)) v.reset() if (epoch + 1) == opt.max_epoch: if use_gpu and opt.num_gpu > 1: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': epoch + 1, }, is_best=False, save_dir=opt.save_dir, filename='checkpoint_ep' + str(epoch + 1) + '.pth.tar') # skip if not save model if (opt.eval_step > 0 and (epoch + 1) % opt.eval_step == 0 and epoch >= 0 or (epoch + 1) == opt.max_epoch): #print('Test on '+opt.testset) #rank1 = reid_evaluator.evaluate(queryloader, galleryloader,normalize=opt.with_normalize) print('Test on ' + opt.trainset) if use_gpu and opt.num_gpu > 1: state_dict = model.module.state_dict() else: state_dict = model.state_dict() save_checkpoint({ 'state_dict': state_dict, 'epoch': epoch + 1, }, is_best=False, save_dir=opt.save_dir, filename='checkpoint_ep' + str(epoch + 1) + '.pth.tar') rank1, mAP = reid_evaluator.evaluate(queryloader, galleryloader, normalize=opt.with_normalize) is_best = rank1 > best_rank1 if is_best: best_rank1 = rank1 best_epoch = epoch + 1 save_checkpoint( { 'state_dict': state_dict, 'epoch': epoch + 1, }, is_best=False, save_dir=opt.save_dir, filename='checkpoint_ep' + str(epoch + 1) + '.pth.tar') print('Best rank-1 {:.1%}, achieved at epoch {}'.format( best_rank1, best_epoch))
def test(**kwargs): opt._parse(kwargs) # set random seed and cudnn benchmark torch.manual_seed(opt.seed) use_gpu = torch.cuda.is_available() sys.stdout = Logger( osp.join(opt.save_dir, 'log_test_{}_{}.txt'.format(opt.testset, opt.testepoch))) if use_gpu: print('currently using GPU {}'.format(opt.device_ids)) cudnn.benchmark = True torch.cuda.manual_seed_all(opt.seed) os.environ["CUDA_VISIBLE_DEVICES"] = str(opt.device_ids) else: print('currently using cpu') print('initializing dataset {}'.format(opt.testset)) dataset = data_manager.init_dataset(name=opt.testset) pin_memory = True if use_gpu else False queryloader = DataLoader(data_manager.init_datafolder(opt.testset, dataset.query, TestTransform( opt.height, opt.width), if_train=False), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) galleryloader = DataLoader(data_manager.init_datafolder(opt.testset, dataset.gallery, TestTransform( opt.height, opt.width), if_train=False), batch_size=opt.test_batch, num_workers=opt.workers, pin_memory=pin_memory) print('loading model ...') model, optim_policy = get_baseline_model(model="softmax", model_path=opt.model_path) best_model_path = os.path.join(opt.save_dir, 'model_best.pth.tar') if os.path.exists(best_model_path) == False: best_model_path = os.path.join( opt.save_dir, "{}_checkpoint_ep{}.pth.tar".format(opt.testepoch, opt.testepoch)) if torch.cuda.is_available(): ckpt = torch.load(best_model_path) else: ckpt = torch.load(best_model_path, map_location="cpu") # remove classifer tmp = dict() for k, v in ckpt['state_dict'].items(): if 'fc' not in k and 'classifier' not in k: tmp[k] = v ckpt['state_dict'] = tmp print(model) print(ckpt) model.load_state_dict(ckpt['state_dict'], strict=False) print('model size: {:.5f}M'.format( sum(p.numel() for p in model.parameters()) / 1e6)) if use_gpu: model = nn.DataParallel(model).cuda() reid_evaluator = evaluator_manager.init_evaluator(opt.testset, model, flip=True) reid_evaluator.evaluate(queryloader, galleryloader, normalize=opt.with_normalize, rerank=False)