def _evaluate_for_validation(self, validationloader=None): losses_t = AverageMeter() self.model.eval() print('Checking performance on validation set ...') all_features = torch.tensor([]).cuda() all_pids = torch.tensor([]) for batch_idx, data in enumerate(validationloader): imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() features = self.model(imgs) all_features = torch.cat((all_features, features), dim=0) all_pids = torch.cat((all_pids, pids.float())) if self.use_gpu: all_pids = all_pids.cuda() loss_t = self._compute_loss(self.validation_criterion, all_features, all_pids) losses_t.update(loss_t.item(), pids.size(0)) print() print('Validation results:') print('Loss_t {loss_t.avg:.4f}\t' 'Lr {lr:.6f}'.format( loss_t=losses_t, lr=self.optimizer.param_groups[0]['lr'] ) ) print() return losses_t.avg
def train(self, print_freq=10, fixbase_epoch=0, open_layers=None): losses = MetricMeter() batch_time = AverageMeter() data_time = AverageMeter() self.set_model_mode('train') self.two_stepped_transfer_learning( self.epoch, fixbase_epoch, open_layers ) self.num_batches = len(self.train_loader) end = time.time() for self.batch_idx, (imgs, pids) in enumerate(self.train_loader): data_time.update(time.time() - end) imgs, pids = self.transform_tr(imgs, pids) loss_summary = self.forward_backward(imgs, pids) batch_time.update(time.time() - end) losses.update(loss_summary) if (self.batch_idx + 1) % print_freq == 0: nb_this_epoch = self.num_batches - (self.batch_idx + 1) nb_future_epochs = ( self.max_epoch - (self.epoch + 1) ) * self.num_batches eta_seconds = batch_time.avg * (nb_this_epoch+nb_future_epochs) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print( 'epoch: [{0}/{1}][{2}/{3}]\t' 'time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'eta {eta}\t' '{losses}\t' 'lr {lr:.6f}'.format( self.epoch + 1, self.max_epoch, self.batch_idx + 1, self.num_batches, batch_time=batch_time, data_time=data_time, eta=eta_str, losses=losses, lr=self.get_current_lr() ) ) if self.writer is not None: n_iter = self.epoch * self.num_batches + self.batch_idx self.writer.add_scalar('Train/time', batch_time.avg, n_iter) self.writer.add_scalar('Train/data', data_time.avg, n_iter) for name, meter in losses.meters.items(): self.writer.add_scalar('Train/' + name, meter.avg, n_iter) self.writer.add_scalar( 'Train/lr', self.get_current_lr(), n_iter ) end = time.time() self.update_lr()
def __init__(self, datamanager, model, optimizer=None, scheduler=None, use_gpu=True): self.datamanager = datamanager self.model = model self.optimizer = optimizer self.scheduler = scheduler self.use_gpu = (torch.cuda.is_available() and use_gpu) self.writer = None self.map_v = AverageMeter() self.rank1_v = AverageMeter() # check attributes if not isinstance(self.model, nn.Module): raise TypeError('model must be an instance of nn.Module')
def train(self, epoch, max_epoch, trainloader, fixbase_epoch=0, open_layers=None, print_freq=10): losses = AverageMeter() base_losses = AverageMeter() my_losses = AverageMeter() density_losses = AverageMeter() accs = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() if (epoch + 1) <= fixbase_epoch and open_layers is not None: print('* Only train {} (epoch: {}/{})'.format( open_layers, epoch + 1, fixbase_epoch)) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) num_batches = len(trainloader) end = time.time() for batch_idx, data in enumerate(trainloader): data_time.update(time.time() - end) imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() output = self.model(imgs) output, v = output # attention_loss = 0 # for i in range(iy.shape[1]): # attention_loss_i = self._compute_loss(self.criterion, iy[:, i, :], pids) # attention_loss += attention_loss_i # if (batch_idx + 1) % print_freq == 0: # print("test: ", i, attention_loss_i) #self.optimizer.zero_grad() #attention_loss.backward(retain_graph=True) #self.optimizer.step() #self.optimizer.zero_grad() #print(output.shape) #my = my.squeeze() #print(my.shape) #Plabels = torch.range(0, my.shape[1] - 1).long().cuda().repeat(imgs.shape[0], 1, 1, 1).reshape(-1) #Plabels = torch.range(0, my.shape[1] - 1).long().cuda().repeat(my.shape[0], 1, 1, 1).view(-1, my.shape[1]) #print(Plabels.shape, Plabels) #my_loss = torch.nn.CrossEntropyLoss()(my, Plabels) base_loss = self._compute_loss(self.criterion, output, pids) loss = base_loss # + 0.1 * my_loss #- density loss.backward() self.optimizer.step() batch_time.update(time.time() - end) losses.update(loss.item(), pids.size(0)) base_losses.update(base_loss.item(), pids.size(0)) #my_losses.update(my_loss.item(), pids.size(0)) #density_losses.update(density.item(), pids.size(0)) accs.update(metrics.accuracy(output, pids)[0].item()) if (batch_idx + 1) % print_freq == 0: # estimate remaining time eta_seconds = batch_time.avg * (num_batches - (batch_idx + 1) + (max_epoch - (epoch + 1)) * num_batches) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print('Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.2f} ({acc.avg:.2f})\t' 'Lr {lr:.6f}\t' 'eta {eta}'.format( epoch + 1, max_epoch, batch_idx + 1, num_batches, batch_time=batch_time, data_time=data_time, loss=losses, acc=accs, lr=self.optimizer.param_groups[0]['lr'], eta=eta_str)) if self.writer is not None: n_iter = epoch * num_batches + batch_idx self.writer.add_scalar('Train/Time', batch_time.avg, n_iter) self.writer.add_scalar('Train/Data', data_time.avg, n_iter) self.writer.add_scalar('Train/Loss', losses.avg, n_iter) self.writer.add_scalar('Train/Base_Loss', base_losses.avg, n_iter) #self.writer.add_scalar('Train/My_Loss', my_losses.avg, n_iter) self.writer.add_scalar('Train/Density_loss', density_losses.avg, n_iter) self.writer.add_scalar('Train/Acc', accs.avg, n_iter) self.writer.add_scalar('Train/Lr', self.optimizer.param_groups[0]['lr'], n_iter) end = time.time() if self.scheduler is not None: self.scheduler.step()
def _evaluate(self, epoch, dataset_name='', queryloader=None, galleryloader=None, dist_metric='euclidean', normalize_feature=False, visrank=False, visrank_topk=10, save_dir='', use_metric_cuhk03=False, ranks=(1, 5, 10, 20), rerank=False, iteration=0): batch_time = AverageMeter() print('Extracting features from query set...') qf, q_pids, q_camids = [], [], [] # query features, query person IDs and query camera IDs for batch_idx, data in tqdm(enumerate(queryloader), 'Processing query...'): imgs, pids, camids = self._parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() features = self._extract_features(imgs, data[3]) batch_time.update(time.time() - end) features = features.data.cpu() qf.append(features) q_pids.extend(pids) q_camids.extend(camids) qf = torch.cat(qf, 0) q_pids = np.asarray(q_pids) q_camids = np.asarray(q_camids) print('Done, obtained {}-by-{} matrix'.format(qf.size(0), qf.size(1))) print('Extracting features from gallery set...') gf, g_pids, g_camids = [], [], [] # gallery features, gallery person IDs and gallery camera IDs for batch_idx, data in tqdm(enumerate(galleryloader), 'Processing gallery...'): imgs, pids, camids = self._parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() features = self._extract_features(imgs, data[3]) batch_time.update(time.time() - end) features = features.data.cpu() gf.append(features) g_pids.extend(pids) g_camids.extend(camids) gf = torch.cat(gf, 0) g_pids = np.asarray(g_pids) g_camids = np.asarray(g_camids) print('Done, obtained {}-by-{} matrix'.format(gf.size(0), gf.size(1))) print('Speed: {:.4f} sec/batch'.format(batch_time.avg)) if normalize_feature: print('Normalizing features with L2 norm...') qf = F.normalize(qf, p=2, dim=1) gf = F.normalize(gf, p=2, dim=1) print('Computing distance matrix with metric={}...'.format(dist_metric)) distmat = metrics.compute_distance_matrix(qf, gf, dist_metric) distmat = distmat.numpy() if rerank: print('Applying person re-ranking ...') distmat_qq = metrics.compute_distance_matrix(qf, qf, dist_metric) distmat_gg = metrics.compute_distance_matrix(gf, gf, dist_metric) distmat = re_ranking(distmat, distmat_qq, distmat_gg) print('Computing CMC and mAP ...') cmc, mAP = metrics.evaluate_rank( distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=use_metric_cuhk03 ) if self.writer is not None: self.writer.add_scalar('Val/{}/mAP'.format(dataset_name), mAP, epoch + 1) for r in ranks: self.writer.add_scalar('Val/{}/Rank-{}'.format(dataset_name, r), cmc[r - 1], epoch + 1) print('** Results **') print('mAP: {:.2%}'.format(mAP)) print('CMC curve') for r in ranks: print('Rank-{:<3}: {:.2%}'.format(r, cmc[r-1])) if visrank: visualize_ranked_results( distmat, self.datamanager.return_testdataset_by_name(dataset_name), self.datamanager.data_type, width=self.datamanager.width, height=self.datamanager.height, save_dir=osp.join(save_dir, 'visrank_' + dataset_name), topk=visrank_topk ) return cmc[0]
def _evaluate(self, epoch, dataset_name='', queryloader=None, galleryloader=None, dist_metric='euclidean', visrank=False, visrank_topk=20, save_dir='', use_metric_cuhk03=False, ranks=[1, 5, 10, 20]): batch_time = AverageMeter() self.model.eval() print('Extracting features from query set ...') qf, q_pids, q_camids = [], [], [] for batch_idx, data in enumerate(queryloader): imgs, pids, camids = self._parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() features = self._extract_features(imgs) batch_time.update(time.time() - end) features = features.data.cpu() qf.append(features) q_pids.extend(pids) q_camids.extend(camids) qf = torch.cat(qf, 0) q_pids = np.asarray(q_pids) q_camids = np.asarray(q_camids) print('Done, obtained {}-by-{} matrix'.format(qf.size(0), qf.size(1))) print('Extracting features from gallery set ...') gf, g_pids, g_camids = [], [], [] end = time.time() for batch_idx, data in enumerate(galleryloader): imgs, pids, camids = self._parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() features = self._extract_features(imgs) batch_time.update(time.time() - end) features = features.data.cpu() gf.append(features) g_pids.extend(pids) g_camids.extend(camids) gf = torch.cat(gf, 0) g_pids = np.asarray(g_pids) g_camids = np.asarray(g_camids) print('Done, obtained {}-by-{} matrix'.format(gf.size(0), gf.size(1))) print('Speed: {:.4f} sec/batch'.format(batch_time.avg)) distmat = metrics.compute_distance_matrix(qf, gf, dist_metric) distmat = distmat.numpy() print('Computing CMC and mAP ...') cmc, mAP = metrics.evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=use_metric_cuhk03) print('** Results **') print('mAP: {:.1%}'.format(mAP)) print('CMC curve') for r in ranks: print('Rank-{:<3}: {:.1%}'.format(r, cmc[r - 1])) if visrank: visualize_ranked_results( distmat, self.datamanager.return_testdataset_by_name(dataset_name), save_dir=osp.join(save_dir, 'visrank-' + str(epoch + 1), dataset_name), topk=visrank_topk) return cmc[0]
def _evaluate( self, dataset_name='', query_loader=None, gallery_loader=None, dist_metric='euclidean', normalize_feature=False, visrank=False, visrank_topk=10, save_dir='', use_metric_cuhk03=False, ranks=[1, 5, 10, 20], rerank=False ): batch_time = AverageMeter() def _feature_extraction(data_loader): f_, pids_, camids_ = [], [], [] for batch_idx, data in enumerate(data_loader): imgs, pids, camids = self.parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() features = self.extract_features(imgs) batch_time.update(time.time() - end) features = features.cpu().clone() f_.append(features) pids_.extend(pids) camids_.extend(camids) f_ = torch.cat(f_, 0) pids_ = np.asarray(pids_) camids_ = np.asarray(camids_) return f_, pids_, camids_ print('Extracting features from query set ...') qf, q_pids, q_camids = _feature_extraction(query_loader) print(qf.shape) print('Done, obtained {}-by-{} matrix'.format(qf.size(0), qf.size(1))) print('Extracting features from gallery set ...') gf, g_pids, g_camids = _feature_extraction(gallery_loader) print('Done, obtained {}-by-{} matrix'.format(gf.size(0), gf.size(1))) print('Speed: {:.4f} sec/batch'.format(batch_time.avg)) if normalize_feature: print('Normalzing features with L2 norm ...') qf = F.normalize(qf, p=2, dim=1) gf = F.normalize(gf, p=2, dim=1) print( 'Computing distance matrix with metric={} ...'.format(dist_metric) ) distmat = metrics.compute_distance_matrix(qf, gf, dist_metric) distmat = distmat.numpy() if rerank: print('Applying person re-ranking ...') distmat_qq = metrics.compute_distance_matrix(qf, qf, dist_metric) distmat_gg = metrics.compute_distance_matrix(gf, gf, dist_metric) distmat = re_ranking(distmat, distmat_qq, distmat_gg) print('Computing CMC and mAP ...') cmc, mAP = metrics.evaluate_rank( distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=use_metric_cuhk03 ) print('** Results **') print('mAP: {:.1%}'.format(mAP)) print('CMC curve') for r in ranks: print('Rank-{:<3}: {:.1%}'.format(r, cmc[r - 1])) if visrank: visualize_ranked_results( distmat, self.datamanager.fetch_test_loaders(dataset_name), self.datamanager.data_type, width=self.datamanager.width, height=self.datamanager.height, save_dir=osp.join(save_dir, 'visrank_' + dataset_name), topk=visrank_topk ) return cmc[0], mAP
def train(self, print_freq=10, fixbase_epoch=0, open_layers=None, lr_finder=False, perf_monitor=None, stop_callback=None): losses = MetricMeter() batch_time = AverageMeter() data_time = AverageMeter() accuracy = AverageMeter() self.set_model_mode('train') if not self._should_freeze_aux_models(self.epoch): # NB: it should be done before `two_stepped_transfer_learning` # to give possibility to freeze some layers in the unlikely event # that `two_stepped_transfer_learning` is used together with nncf self._unfreeze_aux_models() self.two_stepped_transfer_learning(self.epoch, fixbase_epoch, open_layers) if self._should_freeze_aux_models(self.epoch): self._freeze_aux_models() self.num_batches = len(self.train_loader) end = time.time() for self.batch_idx, data in enumerate(self.train_loader): if perf_monitor and not lr_finder: perf_monitor.on_train_batch_begin(self.batch_idx) data_time.update(time.time() - end) if self.compression_ctrl: self.compression_ctrl.scheduler.step(self.batch_idx) loss_summary, avg_acc = self.forward_backward(data) batch_time.update(time.time() - end) losses.update(loss_summary) accuracy.update(avg_acc) if perf_monitor and not lr_finder: perf_monitor.on_train_batch_end(self.batch_idx) if not lr_finder and (((self.batch_idx + 1) % print_freq) == 0 or self.batch_idx == self.num_batches - 1): nb_this_epoch = self.num_batches - (self.batch_idx + 1) nb_future_epochs = (self.max_epoch - (self.epoch + 1)) * self.num_batches eta_seconds = batch_time.avg * (nb_this_epoch + nb_future_epochs) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print('epoch: [{0}/{1}][{2}/{3}]\t' 'time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'cls acc {accuracy.val:.3f} ({accuracy.avg:.3f})\t' 'eta {eta}\t' '{losses}\t' 'lr {lr:.6f}'.format(self.epoch + 1, self.max_epoch, self.batch_idx + 1, self.num_batches, batch_time=batch_time, data_time=data_time, accuracy=accuracy, eta=eta_str, losses=losses, lr=self.get_current_lr())) if self.writer is not None and not lr_finder: n_iter = self.epoch * self.num_batches + self.batch_idx self.writer.add_scalar('Train/time', batch_time.avg, n_iter) self.writer.add_scalar('Train/data', data_time.avg, n_iter) self.writer.add_scalar('Aux/lr', self.get_current_lr(), n_iter) self.writer.add_scalar('Accuracy/train', accuracy.avg, n_iter) for name, meter in losses.meters.items(): self.writer.add_scalar('Loss/' + name, meter.avg, n_iter) end = time.time() self.current_lr = self.get_current_lr() if stop_callback and stop_callback.check_stop(): break if not lr_finder and self.use_ema_decay: self.ema_model.update(self.models[self.main_model_name]) if self.per_batch_annealing: self.update_lr() return losses.meters['loss'].avg
def train(self, epoch, max_epoch, trainloader, fixbase_epoch=0, open_layers=None, print_freq=10): losses1 = AverageMeter() losses2 = AverageMeter() accs1 = AverageMeter() accs2 = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() if (epoch+1)<=fixbase_epoch and open_layers is not None: print('* Only train {} (epoch: {}/{})'.format(open_layers, epoch+1, fixbase_epoch)) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) num_batches = len(trainloader) end = time.time() for batch_idx, data in enumerate(trainloader): data_time.update(time.time() - end) imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() self.optimizer.zero_grad() output1, output2 = self.model(imgs) b = imgs.size(0) loss1 = self._compute_loss(self.criterion, output1, pids[:b//2]) loss2 = self._compute_loss(self.criterion, output2, pids[b//2:b]) loss = (loss1 + loss2) * 0.5 loss.backward() self.optimizer.step() batch_time.update(time.time() - end) losses1.update(loss1.item(), pids[:b//2].size(0)) losses2.update(loss2.item(), pids[b//2:b].size(0)) accs1.update(metrics.accuracy(output1, pids[:b//2])[0].item()) accs2.update(metrics.accuracy(output2, pids[b//2:b])[0].item()) if (batch_idx+1) % print_freq == 0: # estimate remaining time eta_seconds = batch_time.avg * (num_batches-(batch_idx+1) + (max_epoch-(epoch+1))*num_batches) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print('Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss1 {loss1.val:.4f} ({loss1.avg:.4f})\t' 'Loss2 {loss2.val:.4f} ({loss2.avg:.4f})\t' 'Acc1 {acc1.val:.2f} ({acc1.avg:.2f})\t' 'Acc2 {acc2.val:.2f} ({acc2.avg:.2f})\t' 'Lr {lr:.6f}\t' 'eta {eta}'.format( epoch+1, max_epoch, batch_idx+1, num_batches, batch_time=batch_time, data_time=data_time, loss1=losses1, loss2=losses2, acc1=accs1, acc2=accs2, lr=self.optimizer.param_groups[0]['lr'], eta=eta_str ) ) if self.writer is not None: n_iter = epoch * num_batches + batch_idx self.writer.add_scalar('Train/Time', batch_time.avg, n_iter) self.writer.add_scalar('Train/Data', data_time.avg, n_iter) self.writer.add_scalar('Train/Loss1', losses1.avg, n_iter) self.writer.add_scalar('Train/Loss2', losses2.avg, n_iter) self.writer.add_scalar('Train/Acc1', accs1.avg, n_iter) self.writer.add_scalar('Train/Acc2', accs2.avg, n_iter) self.writer.add_scalar('Train/Lr', self.optimizer.param_groups[0]['lr'], n_iter) end = time.time() if self.scheduler is not None: self.scheduler.step()
def train( self, epoch, max_epoch, writer, print_freq=10, fixbase_epoch=0, open_layers=None, ): losses_triplet = AverageMeter() losses_softmax = AverageMeter() losses_mmd_bc = AverageMeter() losses_mmd_wc = AverageMeter() losses_mmd_global = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() if (epoch + 1) <= fixbase_epoch and open_layers is not None: print( '* Only train {} (epoch: {}/{})'.format( open_layers, epoch + 1, fixbase_epoch ) ) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) num_batches = len(self.train_loader) end = time.time() # -------------------------------------------------------------------------------------------------------------------- # for batch_idx, (data, data_t) in enumerate(zip(self.train_loader, self.train_loader_t)): data_time.update(time.time() - end) imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() imgs_t, pids_t = self._parse_data_for_train(data_t) if self.use_gpu: imgs_t = imgs_t.cuda() self.optimizer.zero_grad() outputs, features = self.model(imgs) outputs_t, features_t = self.model(imgs_t) loss_t = self._compute_loss(self.criterion_t, features, pids) loss_x = self._compute_loss(self.criterion_x, outputs, pids) loss = loss_t + loss_x if epoch > 20: loss_mmd_wc, loss_mmd_bc, loss_mmd_global = self._compute_loss(self.criterion_mmd, features, features_t) #loss = loss_t + loss_x + loss_mmd_bc + loss_mmd_wc loss = loss_t + loss_x + loss_mmd_global + loss_mmd_bc + loss_mmd_wc if False: loss_t = torch.tensor(0) loss_x = torch.tensor(0) #loss = loss_mmd_bc + loss_mmd_wc loss = loss_mmd_bc + loss_mmd_wc + loss_mmd_global loss.backward() self.optimizer.step() # -------------------------------------------------------------------------------------------------------------------- # batch_time.update(time.time() - end) losses_triplet.update(loss_t.item(), pids.size(0)) losses_softmax.update(loss_x.item(), pids.size(0)) if epoch > 24: losses_mmd_bc.update(loss_mmd_bc.item(), pids.size(0)) losses_mmd_wc.update(loss_mmd_wc.item(), pids.size(0)) losses_mmd_global.update(loss_mmd_global.item(), pids.size(0)) if (batch_idx + 1) % print_freq == 0: # estimate remaining time eta_seconds = batch_time.avg * ( num_batches - (batch_idx + 1) + (max_epoch - (epoch + 1)) * num_batches ) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print( 'Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss_t {losses1.val:.4f} ({losses1.avg:.4f})\t' 'Loss_x {losses2.val:.4f} ({losses2.avg:.4f})\t' 'Loss_mmd_wc {losses3.val:.4f} ({losses3.avg:.4f})\t' 'Loss_mmd_bc {losses4.val:.4f} ({losses4.avg:.4f})\t' 'Loss_mmd_global {losses5.val:.4f} ({losses5.avg:.4f})\t' 'eta {eta}'.format( epoch + 1, max_epoch, batch_idx + 1, num_batches, batch_time=batch_time, losses1=losses_triplet, losses2=losses_softmax, losses3=losses_mmd_wc, losses4=losses_mmd_bc, losses5=losses_mmd_global, eta=eta_str ) ) if writer is not None: n_iter = epoch * num_batches + batch_idx writer.add_scalar('Train/Time', batch_time.avg, n_iter) writer.add_scalar('Train/Loss_triplet', losses_triplet.avg, n_iter) writer.add_scalar('Train/Loss_softmax', losses_softmax.avg, n_iter) writer.add_scalar('Train/Loss_mmd_bc', losses_mmd_bc.avg, n_iter) writer.add_scalar('Train/Loss_mmd_wc', losses_mmd_wc.avg, n_iter) writer.add_scalar('Train/Loss_mmd_global', losses_mmd_global.avg, n_iter) writer.add_scalar( 'Train/Lr', self.optimizer.param_groups[0]['lr'], n_iter ) end = time.time() if self.scheduler is not None: self.scheduler.step() print_distri = False if print_distri: instances = self.datamanager.train_loader.sampler.num_instances batch_size = self.datamanager.train_loader.batch_size feature_size = 2048 # features_t.shape[1] # 2048 t = torch.reshape(features_t, (int(batch_size / instances), instances, feature_size)) # and compute bc/wc euclidean distance bct = compute_distance_matrix(t[0], t[0]) wct = compute_distance_matrix(t[0], t[1]) for i in t[1:]: bct = torch.cat((bct, compute_distance_matrix(i, i))) for j in t: if j is not i: wct = torch.cat((wct, compute_distance_matrix(i, j))) s = torch.reshape(features, (int(batch_size / instances), instances, feature_size)) bcs = compute_distance_matrix(s[0], s[0]) wcs = compute_distance_matrix(s[0], s[1]) for i in s[1:]: bcs = torch.cat((bcs, compute_distance_matrix(i, i))) for j in s: if j is not i: wcs = torch.cat((wcs, compute_distance_matrix(i, j))) bcs = bcs.detach() wcs = wcs.detach() b_c = [x.cpu().detach().item() for x in bcs.flatten() if x > 0.000001] w_c = [x.cpu().detach().item() for x in wcs.flatten() if x > 0.000001] data_bc = norm.rvs(b_c) sns.distplot(data_bc, bins='auto', fit=norm, kde=False, label='from the same class (within class)') data_wc = norm.rvs(w_c) sns.distplot(data_wc, bins='auto', fit=norm, kde=False, label='from different class (between class)') plt.xlabel('Euclidean distance') plt.ylabel('Frequence of apparition') plt.title('Source Domain') plt.legend() plt.show() b_ct = [x.cpu().detach().item() for x in bct.flatten() if x > 0.1] w_ct = [x.cpu().detach().item() for x in wct.flatten() if x > 0.1] data_bc = norm.rvs(b_ct) sns.distplot(data_bc, bins='auto', fit=norm, kde=False, label='from the same class (within class)') data_wc = norm.rvs(w_ct) sns.distplot(data_wc, bins='auto', fit=norm, kde=False, label='from different class (between class)') plt.xlabel('Euclidean distance') plt.ylabel('Frequence of apparition') plt.title('Target Domain') plt.legend() plt.show()
def train(self, epoch, max_epoch, trainloader, fixbase_epoch=0, open_layers=None, print_freq=10): losses = AverageMeter() accs = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() if (epoch + 1) <= fixbase_epoch and open_layers is not None: print('* Only train {} (epoch: {}/{})'.format( open_layers, epoch + 1, fixbase_epoch)) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) end = time.time() for batch_idx, data in enumerate(trainloader): data_time.update(time.time() - end) imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() self.optimizer.zero_grad() outputs = self.model(imgs) loss = self._compute_loss(self.criterion, outputs, pids) loss.backward() self.optimizer.step() batch_time.update(time.time() - end) losses.update(loss.item(), pids.size(0)) accs.update(metrics.accuracy(outputs, pids)[0].item()) if (batch_idx + 1) % print_freq == 0: # estimate remaining time num_batches = len(trainloader) eta_seconds = batch_time.avg * (num_batches - (batch_idx + 1) + (max_epoch - (epoch + 1)) * num_batches) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print('Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.2f} ({acc.avg:.2f})\t' 'Lr {lr:.6f}\t' 'Eta {eta}'.format( epoch + 1, max_epoch, batch_idx + 1, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses, acc=accs, lr=self.optimizer.param_groups[0]['lr'], eta=eta_str)) end = time.time() if self.scheduler is not None: self.scheduler.step()
def train(self, epoch, max_epoch, trainloader, fixbase_epoch=0, open_layers=None, print_freq=10): use_matching_loss = False if epoch >= self.reg_matching_score_epoch: use_matching_loss = True losses = AverageMeter() accs = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() att_losses = AverageMeter() part_losses = AverageMeter() matching_losses = AverageMeter() self.model.train() if (epoch + 1) <= fixbase_epoch and open_layers is not None: print('* Only train {} (epoch: {}/{})'.format( open_layers, epoch + 1, fixbase_epoch)) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) end = time.time() for batch_idx, data in enumerate(trainloader): data_time.update(time.time() - end) imgs, pids, pose_heatmaps = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() pose_heatmaps = pose_heatmaps.cuda() self.optimizer.zero_grad() outputs, attmaps, part_score, v_g = self.model(imgs, pose_heatmaps) #classification loss loss_class = self._compute_loss(self.criterion, outputs, pids) # using for weighting each part with visibility # loss_class = self._compute_loss(self.criterion, outputs, pids, part_score.detach()) loss_matching, loss_partconstr = self.part_c_criterion( v_g, pids, part_score, use_matching_loss) # add matching loss loss = loss_class + loss_partconstr # visibility verification loss if use_matching_loss: loss = loss + loss_matching matching_losses.update(loss_matching.item(), pids.size(0)) if self.use_att_loss: loss_att = self.att_criterion(attmaps) loss = loss + loss_att att_losses.update(loss_att.item(), pids.size(0)) loss.backward() self.optimizer.step() batch_time.update(time.time() - end) losses.update(loss.item(), pids.size(0)) part_losses.update(loss_partconstr.item(), pids.size(0)) accs.update(metrics.accuracy(outputs, pids)[0].item()) if (batch_idx + 1) % print_freq == 0: # estimate remaining time num_batches = len(trainloader) eta_seconds = batch_time.avg * (num_batches - (batch_idx + 1) + (max_epoch - (epoch + 1)) * num_batches) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print('Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'part_Loss {loss_part.val:.4f} ({loss_part.avg:.4f})\t' 'Acc {acc.val:.2f} ({acc.avg:.2f})\t' 'Lr {lr:.6f}\t' 'Eta {eta}'.format( epoch + 1, max_epoch, batch_idx + 1, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses, loss_part=part_losses, acc=accs, lr=self.optimizer.param_groups[0]['lr'], eta=eta_str), end='\t') if self.use_att_loss: print( 'attLoss {attloss.val:.4f} ({attloss.avg:.4f})'.format( attloss=att_losses), end='\t') if use_matching_loss: print( 'matchLoss {match_loss.val:.4f} ({match_loss.avg:.4f})' .format(match_loss=matching_losses), end='\t') print('\n') end = time.time() if self.scheduler is not None: self.scheduler.step()
def train( self, epoch, max_epoch, writer, print_freq=1, fixbase_epoch=0, open_layers=None, ): losses_triplet = AverageMeter() losses_softmax = AverageMeter() losses_recons_s = AverageMeter() losses_recons_t = AverageMeter() losses_mmd_bc = AverageMeter() losses_mmd_wc = AverageMeter() losses_mmd_global = AverageMeter() losses_local = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() if (epoch + 1) <= fixbase_epoch and open_layers is not None: print('* Only train {} (epoch: {}/{})'.format( open_layers, epoch + 1, fixbase_epoch)) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) num_batches = len(self.train_loader) end = time.time() weight_r = self.weight_r # -------------------------------------------------------------------------------------------------------------------- # for batch_idx, (data, data_t) in enumerate( zip(self.train_loader, self.train_loader_t)): data_time.update(time.time() - end) imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() imgs_t, pids_t = self._parse_data_for_train(data_t) if self.use_gpu: imgs_t = imgs_t.cuda() self.optimizer.zero_grad() noisy_imgs = self.random(imgs) outputs, part_outs, features, recons, z, mean, var, local_feat = self.model( noisy_imgs) parts_loss = 0 for i in range(len(part_outs)): out = part_outs[i] parts_loss += self._compute_loss( self.criterion_x, out, pids) # self.criterion( out, pids) parts_loss = parts_loss / len(part_outs) #print("local feats") #print(local_feat.shape) #print("global feats ") #print(local_feat.reshape(local_feat.size(0),-1).t().shape) imgs_t = self.random2(imgs_t) outputs_t, parts_out_t, features_t, recons_t, z_t, mean_t, var_t, local_feat_t = self.model( imgs_t) loss_t = self._compute_loss(self.criterion_t, features, pids) loss_x = self._compute_loss(self.criterion_x, outputs, pids) loss_r1 = self.loss_vae(imgs, recons, mean, var) loss_r2 = self.loss_vae(imgs_t, recons_t, mean_t, var_t) dist_mat_s = self.get_local_correl(local_feat) dist_mat_t = self.get_local_correl(local_feat_t) dist_mat_s = dist_mat_s.detach() local_loss = self.criterion_mmd.mmd_rbf_noaccelerate( dist_mat_s, dist_mat_t) kl_loss = torch.tensor(0) #loss = loss_t + loss_x + weight_r*loss_r1 + (weight_r*2)*loss_r2 + loss_mmd_global #+ 0.1*kl_loss loss_mmd_wc, loss_mmd_bc, loss_mmd_global = self._compute_loss( self.criterion_mmd, features, features_t) loss = loss_t + loss_x + weight_r * loss_r1 + 0 * loss_r2 + loss_mmd_wc + loss_mmd_bc + loss_mmd_global + parts_loss #weight_r2 =0 is best if epoch > 10: #loss = loss_t + loss_x + weight_r*loss_r1 + (weight_r)*loss_r2 + loss_mmd_wc + loss_mmd_bc + loss_mmd_global if False: loss_mmd_bc = torch.tensor(0) loss_mmd_global = torch.tensor(0) loss_mmd_wc = torch.tensor(0) kl_loss = torch.tensor(0) #loss = loss_mmd_bc + loss_mmd_wc loss = loss_t + loss_x + weight_r * loss_r1 + ( weight_r ) * loss_r2 + loss_mmd_wc + loss_mmd_bc + loss_mmd_global loss.backward() self.optimizer.step() # -------------------------------------------------------------------------------------------------------------------- # batch_time.update(time.time() - end) losses_triplet.update(loss_t.item(), pids.size(0)) losses_softmax.update(loss_x.item(), pids.size(0)) losses_recons_s.update(loss_r1.item(), pids.size(0)) losses_recons_t.update(loss_r2.item(), pids.size(0)) losses_local.update(local_loss.item(), pids.size(0)) if (batch_idx + 1) % print_freq == 0: # estimate remaining time eta_seconds = batch_time.avg * (num_batches - (batch_idx + 1) + (max_epoch - (epoch + 1)) * num_batches) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print('Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Loss_t {losses1.val:.4f} ({losses1.avg:.4f})\t' 'Loss_x {losses2.val:.4f} ({losses2.avg:.4f})\t' 'Loss_reconsS {losses4.val:.4f} ({losses4.avg:.4f})\t' 'Loss_reconsT {losses5.val:.4f} ({losses5.avg:.4f})\t' 'Loss_local {losses6.val:.4f} ({losses6.avg:.4f})\t' 'eta {eta}'.format(epoch + 1, max_epoch, batch_idx + 1, num_batches, batch_time=batch_time, losses1=losses_triplet, losses2=losses_softmax, losses4=losses_recons_s, losses5=losses_recons_t, losses6=losses_local, eta=eta_str)) if writer is not None: n_iter = epoch * num_batches + batch_idx writer.add_scalar('Train/Time', batch_time.avg, n_iter) writer.add_scalar('Train/Loss_triplet', losses_triplet.avg, n_iter) writer.add_scalar('Train/Loss_softmax', losses_softmax.avg, n_iter) writer.add_scalar('Train/Loss_recons_s', losses_recons_s.avg, n_iter) writer.add_scalar('Train/Loss_recons_t', losses_recons_t.avg, n_iter) end = time.time() if self.scheduler is not None: self.scheduler.step() print_distri = False if print_distri: print("Printing distribution") instances = self.datamanager.train_loader.sampler.num_instances batch_size = self.datamanager.train_loader.batch_size feature_size = 1024 # features_t.shape[1] # 2048 #print("local feature size!!!") #print(local_feat_t.shape) local_feat_t = local_feat_t.reshape(local_feat_t.size(0), -1) t = torch.reshape( local_feat_t, (int(batch_size / instances), instances, feature_size)) # and compute bc/wc euclidean distance bct = compute_distance_matrix(t[0], t[0]) wct = compute_distance_matrix(t[0], t[1]) for i in t[1:]: bct = torch.cat((bct, compute_distance_matrix(i, i))) for j in t: if j is not i: wct = torch.cat((wct, compute_distance_matrix(i, j))) s = torch.reshape( local_feat, (int(batch_size / instances), instances, feature_size)) bcs = compute_distance_matrix(s[0], s[0]) wcs = compute_distance_matrix(s[0], s[1]) for i in s[1:]: bcs = torch.cat((bcs, compute_distance_matrix(i, i))) for j in s: if j is not i: wcs = torch.cat((wcs, compute_distance_matrix(i, j))) bcs = bcs.detach() wcs = wcs.detach() b_c = [ x.cpu().detach().item() for x in bcs.flatten() if x > 0.000001 ] w_c = [ x.cpu().detach().item() for x in wcs.flatten() if x > 0.000001 ] data_bc = norm.rvs(b_c) sns.distplot(data_bc, bins='auto', fit=norm, kde=False, label='from the same class (within class)') data_wc = norm.rvs(w_c) sns.distplot(data_wc, bins='auto', fit=norm, kde=False, label='from different class (between class)') plt.xlabel('Euclidean distance') plt.ylabel('Frequence of Occurance') plt.title('Source Domain') plt.legend() plt.savefig( "/export/livia/home/vision/mkiran/work/Person_Reid/Video_Person/Domain_Adapt/D-MMD/figs/Non_Occluded_distribution.png" ) plt.clf() b_ct = [x.cpu().detach().item() for x in bct.flatten() if x > 0.1] w_ct = [x.cpu().detach().item() for x in wct.flatten() if x > 0.1] data_bc = norm.rvs(b_ct) sns.distplot(data_bc, bins='auto', fit=norm, kde=False, label='from the same class (within class)') data_wc = norm.rvs(w_ct) sns.distplot(data_wc, bins='auto', fit=norm, kde=False, label='from different class (between class)') plt.xlabel('Euclidean distance') plt.ylabel('Frequence of apparition') plt.title('Non-Occluded Data Domain') plt.legend() plt.savefig( "/export/livia/home/vision/mkiran/work/Person_Reid/Video_Person/Domain_Adapt/D-MMD/figs/Occluded_distribution.png" ) plt.clf()
def train(self, epoch, max_epoch, trainloader, fixbase_epoch=0, open_layers=None, print_freq=10): losses = AverageMeter() top_meters = [AverageMeter() for _ in range(5)] batch_time = AverageMeter() data_time = AverageMeter() self.model.train() if (epoch + 1) <= fixbase_epoch and open_layers is not None: print('* Only train {} (epoch: {}/{})'.format( open_layers, epoch + 1, fixbase_epoch)) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) end = time.time() for batch_idx, data in enumerate(trainloader): data_time.update(time.time() - end) num_batches = len(trainloader) global_step = num_batches * epoch + batch_idx imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() self.optimizer.zero_grad() outputs = self.model(imgs) loss = self._compute_loss(self.criterion, outputs, pids) loss.backward() self.optimizer.step() batch_time.update(time.time() - end) losses.update(loss.item(), pids.size(0)) accs = metrics.accuracy(outputs, pids, topk=(1, 2, 3, 4, 5)) for i, meter in enumerate(top_meters): meter.update(accs[i].item()) # write to Tensorboard & comet.ml accs_dict = { 'train-accs-top-' + str(i + 1): float(r) for i, r in enumerate(accs) } for i, r in enumerate(accs): self.writer.add_scalars('optim/train-accs', {'top-' + str(i + 1): float(r)}, global_step) self.experiment.log_metrics(accs_dict, step=global_step) self.writer.add_scalar( 'optim/loss', losses.val, global_step) # loss, loss.item() or losses.val ?? # self.writer.add_scalar('optim/loss-avg',losses.avg,global_step) self.experiment.log_metric('optim/loss', losses.val, step=global_step) self.writer.add_scalar('optim/lr', self.optimizer.param_groups[0]['lr'], global_step) self.experiment.log_metric('optim/lr', self.optimizer.param_groups[0]['lr'], step=global_step) if (batch_idx + 1) % print_freq == 0: # estimate remaining time num_batches = len(trainloader) eta_seconds = batch_time.avg * (num_batches - (batch_idx + 1) + (max_epoch - (epoch + 1)) * num_batches) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print('Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Top-1 {r1.val:.2f} ({r1.avg:.2f})\t' 'Top-2 {r2.val:.2f} ({r2.avg:.2f})\t' 'Top-3 {r3.val:.2f} ({r3.avg:.2f})\t' 'Top-4 {r4.val:.2f} ({r4.avg:.2f})\t' 'Top-5 {r5.val:.2f} ({r5.avg:.2f})\t' 'Lr {lr:.6f}\t' 'Eta {eta}'.format( epoch + 1, max_epoch, batch_idx + 1, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses, r1=top_meters[0], r2=top_meters[1], r3=top_meters[2], r4=top_meters[3], r5=top_meters[4], lr=self.optimizer.param_groups[0]['lr'], eta=eta_str)) self.writer.add_scalar('eta', eta_seconds, global_step) self.experiment.log_metric('eta', eta_seconds, step=global_step) end = time.time() if isinstance(self.scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): self.scheduler.step(losses.val) elif self.scheduler is not None: self.scheduler.step()
def _evaluate(self, epoch, dataset_name='', queryloader=None, galleryloader=None, dist_metric='euclidean', normalize_feature=False, visrank=False, visrankactiv=False, visrank_topk=10, save_dir='', use_metric_cuhk03=False, ranks=[1, 5, 10, 20], rerank=False, visrankactivthr=False, maskthr=0.7, visdrop=False, visdroptype='random'): batch_time = AverageMeter() print('Extracting features from query set ...') qf, qa, q_pids, q_camids, qm = [], [], [], [], [ ] # query features, query activations, query person IDs, query camera IDs and image drop masks for _, data in enumerate(queryloader): imgs, pids, camids = self._parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() features = self._extract_features(imgs) activations = self._extract_activations(imgs) dropmask = self._extract_drop_masks(imgs, visdrop, visdroptype) batch_time.update(time.time() - end) features = features.data.cpu() qf.append(features) qa.append(torch.Tensor(activations)) qm.append(torch.Tensor(dropmask)) q_pids.extend(pids) q_camids.extend(camids) qf = torch.cat(qf, 0) qm = torch.cat(qm, 0) qa = torch.cat(qa, 0) q_pids = np.asarray(q_pids) q_camids = np.asarray(q_camids) print('Done, obtained {}-by-{} matrix'.format(qf.size(0), qf.size(1))) print('Extracting features from gallery set ...') gf, ga, g_pids, g_camids, gm = [], [], [], [], [ ] # gallery features, gallery activations, gallery person IDs, gallery camera IDs and image drop masks end = time.time() for _, data in enumerate(galleryloader): imgs, pids, camids = self._parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() features = self._extract_features(imgs) activations = self._extract_activations(imgs) dropmask = self._extract_drop_masks(imgs, visdrop, visdroptype) batch_time.update(time.time() - end) features = features.data.cpu() gf.append(features) ga.append(torch.Tensor(activations)) gm.append(torch.Tensor(dropmask)) g_pids.extend(pids) g_camids.extend(camids) gf = torch.cat(gf, 0) gm = torch.cat(gm, 0) ga = torch.cat(ga, 0) g_pids = np.asarray(g_pids) g_camids = np.asarray(g_camids) print('Done, obtained {}-by-{} matrix'.format(gf.size(0), gf.size(1))) print('Speed: {:.4f} sec/batch'.format(batch_time.avg)) if normalize_feature: print('Normalzing features with L2 norm ...') qf = F.normalize(qf, p=2, dim=1) gf = F.normalize(gf, p=2, dim=1) print( 'Computing distance matrix with metric={} ...'.format(dist_metric)) distmat = metrics.compute_distance_matrix(qf, gf, dist_metric) distmat = distmat.numpy() #always show results without re-ranking first print('Computing CMC and mAP ...') cmc, mAP = metrics.evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=use_metric_cuhk03) print('** Results **') print('mAP: {:.1%}'.format(mAP)) print('CMC curve') for r in ranks: print('Rank-{:<3}: {:.1%}'.format(r, cmc[r - 1])) if rerank: print('Applying person re-ranking ...') distmat_qq = metrics.compute_distance_matrix(qf, qf, dist_metric) distmat_gg = metrics.compute_distance_matrix(gf, gf, dist_metric) distmat = re_ranking(distmat, distmat_qq, distmat_gg) print('Computing CMC and mAP ...') cmc, mAP = metrics.evaluate_rank( distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=use_metric_cuhk03) print('** Results with Re-Ranking**') print('mAP: {:.1%}'.format(mAP)) print('CMC curve') for r in ranks: print('Rank-{:<3}: {:.1%}'.format(r, cmc[r - 1])) if visrank: visualize_ranked_results( distmat, self.datamanager.return_testdataset_by_name(dataset_name), self.datamanager.data_type, width=self.datamanager.width, height=self.datamanager.height, save_dir=osp.join(save_dir, 'visrank_' + dataset_name), topk=visrank_topk) if visrankactiv: visualize_ranked_activation_results( distmat, qa, ga, self.datamanager.return_testdataset_by_name(dataset_name), self.datamanager.data_type, width=self.datamanager.width, height=self.datamanager.height, save_dir=osp.join(save_dir, 'visrankactiv_' + dataset_name), topk=visrank_topk) if visrankactivthr: visualize_ranked_threshold_activation_results( distmat, qa, ga, self.datamanager.return_testdataset_by_name(dataset_name), self.datamanager.data_type, width=self.datamanager.width, height=self.datamanager.height, save_dir=osp.join(save_dir, 'visrankactivthr_' + dataset_name), topk=visrank_topk, threshold=maskthr) if visdrop: visualize_ranked_mask_activation_results( distmat, qa, ga, qm, gm, self.datamanager.return_testdataset_by_name(dataset_name), self.datamanager.data_type, width=self.datamanager.width, height=self.datamanager.height, save_dir=osp.join( save_dir, 'visdrop_{}_{}'.format(visdroptype, dataset_name)), topk=visrank_topk) return cmc[0]
def train(self, epoch, max_epoch, trainloader, fixbase_epoch=0, open_layers=None, print_freq=10): losses_t = AverageMeter() losses_x = AverageMeter() losses = AverageMeter() accs = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() if (epoch+1)<=fixbase_epoch and open_layers is not None: print('* Only train {} (epoch: {}/{})'.format(open_layers, epoch+1, fixbase_epoch)) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) end = time.time() for batch_idx, data in enumerate(trainloader): data_time.update(time.time() - end) num_batches = len(trainloader) global_step = num_batches * epoch + batch_idx imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() self.optimizer.zero_grad() outputs, features = self.model(imgs) loss_t = self._compute_loss(self.criterion_t, features, pids) loss_x = self._compute_loss(self.criterion_x, outputs, pids) loss = self.weight_t * loss_t + self.weight_x * loss_x loss.backward() self.optimizer.step() batch_time.update(time.time() - end) losses_t.update(loss_t.item(), pids.size(0)) losses_x.update(loss_x.item(), pids.size(0)) losses.update(loss.item(), pids.size(0)) accs.update(metrics.accuracy(outputs, pids, topk=(1,))[0].item()) # write to Tensorboard & comet.ml #self.writer.add_scalars('optim/accs',accs.val,global_step) self.experiment.log_metric('optim/accs',accs.val,step=global_step) #self.writer.add_scalar('optim/loss',losses.val,global_step) # loss, loss.item() or losses.val ?? self.experiment.log_metric('optim/loss',losses.val,step=global_step) #self.writer.add_scalar('optim/loss_triplet',losses_t.val,global_step) self.experiment.log_metric('optim/loss_triplet',losses_t.val,step=global_step) #self.writer.add_scalar('optim/loss_softmax',losses_x.val,global_step) self.experiment.log_metric('optim/loss_softmax',losses_x.val,step=global_step) #self.writer.add_scalar('optim/lr',self.optimizer.param_groups[0]['lr'],global_step) self.experiment.log_metric('optim/lr',self.optimizer.param_groups[0]['lr'],step=global_step) if (batch_idx+1) % print_freq == 0: # estimate remaining time num_batches = len(trainloader) eta_seconds = batch_time.avg * (num_batches-(batch_idx+1) + (max_epoch-(epoch+1))*num_batches) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print('Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Loss_t {loss_t.val:.4f} ({loss_t.avg:.4f})\t' 'Loss_x {loss_x.val:.4f} ({loss_x.avg:.4f})\t' 'Acc {acc.val:.2f} ({acc.avg:.2f})\t' 'Lr {lr:.6f}\t' 'Eta {eta}'.format( epoch+1, max_epoch, batch_idx+1, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses, loss_t=losses_t, loss_x=losses_x, acc=accs, lr=self.optimizer.param_groups[0]['lr'], eta=eta_str ) ) self.writer.add_scalar('eta',eta_seconds,global_step) self.experiment.log_metric('eta',eta_seconds,step=global_step) end = time.time() if isinstance(self.scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau): self.scheduler.step(losses.val) elif self.scheduler is not None: self.scheduler.step()
def _evaluate(self, arch, epoch, dataset_name='', queryloader=None, galleryloader=None, dist_metric='euclidean', normalize_feature=False, visrank=False, visrank_topk=20, save_dir='', use_metric_cuhk03=False, ranks=[1, 5, 10, 20], rerank=False, viscam=False, viscam_num=10, viscam_only=False): with self.experiment.test(): if not viscam_only: batch_time = AverageMeter() combine_time = AverageMeter() self.model.eval() print('Extracting features from query set ...') qf, q_pids, q_camids = [], [], [] # query features, query person IDs and query camera IDs for batch_idx, data in enumerate(queryloader): imgs, pids, camids = self._parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() features = self._extract_features(imgs) batch_time.update(time.time() - end, len(pids), True) features = features.data.cpu() qf.append(features) q_pids.extend(pids) q_camids.extend(camids) qf = torch.cat(qf, 0) q_pids = np.asarray(q_pids) q_camids = np.asarray(q_camids) print('Done, obtained {}-by-{} matrix'.format(qf.size(0), qf.size(1))) print('Extracting features from gallery set ...') gf, g_pids, g_camids = [], [], [] # gallery features, gallery person IDs and gallery camera IDs end = time.time() for batch_idx, data in enumerate(galleryloader): imgs, pids, camids = self._parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() features = self._extract_features(imgs) batch_time.update(time.time() - end, len(pids), True) features = features.data.cpu() gf.append(features) g_pids.extend(pids) g_camids.extend(camids) gf = torch.cat(gf, 0) g_pids = np.asarray(g_pids) g_camids = np.asarray(g_camids) end = time.time() num_images = len(g_pids) self.combine_fn.train() gf, g_pids = self.combine_fn(gf, g_pids, g_camids) if self.save_embed: assert osp.isdir(self.save_embed) path = osp.realpath(self.save_embed) np.save(path + '/gf-' + self.combine_method + '.npy', gf) np.save(path + '/g_pids-' + self.combine_method + '.npy', g_pids) combine_time.update(time.time() - end, num_images, True) time.time() - end gf = torch.tensor(gf, dtype=torch.float) print('Done, obtained {}-by-{} matrix'.format(gf.size(0), gf.size(1))) print('Speed: {:.4f} sec/image'.format(batch_time.avg + combine_time.avg)) if normalize_feature: print('Normalzing features with L2 norm ...') qf = F.normalize(qf, p=2, dim=1) gf = F.normalize(gf, p=2, dim=1) print('Computing distance matrix with metric={} ...'.format(dist_metric)) distmat = metrics.compute_distance_matrix(qf, gf, dist_metric) distmat = distmat.numpy() if rerank: print('Applying person re-ranking ...') distmat_qq = metrics.compute_distance_matrix(qf, qf, dist_metric) distmat_gg = metrics.compute_distance_matrix(gf, gf, dist_metric) distmat = re_ranking(distmat, distmat_qq, distmat_gg) print('Computing CMC and mAP ...') cmc, mAP = metrics.evaluate_rank( distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=use_metric_cuhk03 ) print('** Results **') print('mAP: {:.1%}'.format(mAP)) print('CMC curve') for r in ranks: print('Rank-{:<3}: {:.1%}'.format(r, cmc[r-1])) # write to Tensorboard and comet.ml if not self.test_only: rs = {'eval-rank-{:<3}'.format(r):cmc[r-1] for r in ranks} self.writer.add_scalars('eval/ranks',rs,epoch) self.experiment.log_metrics(rs,step=epoch) self.writer.add_scalar('eval/mAP',mAP,epoch) self.experiment.log_metric('eval-mAP',mAP,step=epoch) print('Results written to tensorboard and comet.ml.') if visrank: visualize_ranked_results( distmat, self.datamanager.return_testdataset_by_name(dataset_name), save_dir=osp.join(save_dir, 'visrank-'+str(epoch+1), dataset_name), topk=visrank_topk ) if viscam: if arch == 'osnet_x1_0' or arch == 'osnet_custom': # print(self.model) visualize_cam( model=self.model, finalconv='conv5', # for OSNet dataset=self.datamanager.return_testdataset_by_name(dataset_name), save_dir=osp.join(save_dir, 'viscam-'+str(epoch+1), dataset_name), num=viscam_num ) elif arch == 'resnext50_32x4d': # print(self.model) visualize_cam( model=self.model, finalconv='layer4', # for resnext50 dataset=self.datamanager.return_testdataset_by_name(dataset_name), save_dir=osp.join(save_dir, 'viscam-'+str(epoch+1), dataset_name), num=viscam_num ) if viscam_only: raise RuntimeError('Stop exec because `viscam_only` is set to true.') return cmc[0]
def train( self, epoch, max_epoch, writer, print_freq=10, fixbase_epoch=0, open_layers=None ): losses_t = AverageMeter() losses_x = AverageMeter() losses_recons = AverageMeter() accs = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() open_all_layers(self.model) num_batches = len(self.train_loader) end = time.time() for batch_idx, data in enumerate(self.train_loader): data_time.update(time.time() - end) imgs, pids = self._parse_data_for_train(data) imgs_clean=imgs.clone() if self.use_gpu: imgs = imgs.cuda() imgs_clean = imgs_clean.cuda() pids = pids.cuda() labelss=[] if epoch >= 0 and epoch < 15: randmt = RandomErasing(probability=0.5,sl=0.07, sh=0.3) for i, img in enumerate(imgs): imgs[i],p = randmt(img) labelss.append(p) if epoch >= 15: randmt = RandomErasing(probability=0.5,sl=0.1, sh=0.3) for i, img in enumerate(imgs): imgs[i],p = randmt(img) labelss.append(p) binary_labels = torch.tensor(np.asarray(labelss)).cuda() self.optimizer.zero_grad() outputs, outputs2, recons,bin_out1,bin_out2, bin_out3 = self.model(imgs ) loss_mse = self.criterion_mse(recons, imgs_clean) loss = self.mgn_loss(outputs, pids) occ_loss1 = self.BCE_criterion(bin_out1.squeeze(1),binary_labels.float() ) occ_loss2 = self.BCE_criterion(bin_out2.squeeze(1),binary_labels.float() ) occ_loss3 = self.BCE_criterion(bin_out3.squeeze(1),binary_labels.float() ) loss = loss + .05*loss_mse + 0.1*occ_loss1 + 0.1*occ_loss2+0.1*occ_loss3 #loss = self.weight_t * loss_t + self.weight_x * loss_x #+ #self.weight_r*loss_mse loss.backward() self.optimizer.step() batch_time.update(time.time() - end) #losses_t.update(loss_t.item(), pids.size(0)) losses_x.update(loss.item(), pids.size(0)) losses_recons.update(occ_loss1.item(), binary_labels.size(0)) accs.update(metrics.accuracy(outputs, pids)[0].item()) if (batch_idx + 1) % print_freq == 0: # estimate remaining time eta_seconds = batch_time.avg * ( num_batches - (batch_idx + 1) + (max_epoch - (epoch + 1)) * num_batches ) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print( 'Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' #'Loss_t {loss_t.val:.4f} ({loss_t.avg:.4f})\t' 'Loss_x {loss_x.val:.4f} ({loss_x.avg:.4f})\t' 'Loss_Occlusion {loss_r.val:.4f} ({loss_r.avg:.4f})\t' 'Acc {acc.val:.2f} ({acc.avg:.2f})\t' 'Lr {lr:.6f}\t' 'eta {eta}'.format( epoch + 1, max_epoch, batch_idx + 1, num_batches, batch_time=batch_time, data_time=data_time, #loss_t=losses_t, loss_x=losses_x, loss_r = losses_recons, acc=accs, lr=self.optimizer.param_groups[0]['lr'], eta=eta_str ) ) writer= None if writer is not None: n_iter = epoch * num_batches + batch_idx writer.add_scalar('Train/Time', batch_time.avg, n_iter) writer.add_scalar('Train/Data', data_time.avg, n_iter) writer.add_scalar('Train/Loss_t', losses_t.avg, n_iter) writer.add_scalar('Train/Loss_x', losses_x.avg, n_iter) writer.add_scalar('Train/Acc', accs.avg, n_iter) writer.add_scalar( 'Train/Lr', self.optimizer.param_groups[0]['lr'], n_iter ) end = time.time() if self.scheduler is not None: self.scheduler.step()
def run(self, trial=None, save_dir='log', tb_writer=None, max_epoch=0, start_epoch=0, print_freq=10, fixbase_epoch=0, open_layers=None, start_eval=0, eval_freq=-1, dist_metric='euclidean', normalize_feature=False, visrank=False, visrank_topk=10, use_metric_cuhk03=False, ranks=(1, 5, 10, 20), lr_finder=None, perf_monitor=None, stop_callback=None, initial_seed=5, **kwargs): r"""A unified pipeline for training and evaluating a model. Args: save_dir (str): directory to save model. max_epoch (int): maximum epoch. start_epoch (int, optional): starting epoch. Default is 0. print_freq (int, optional): print_frequency. Default is 10. fixbase_epoch (int, optional): number of epochs to train ``open_layers`` (new layers) while keeping base layers frozen. Default is 0. ``fixbase_epoch`` is counted in ``max_epoch``. open_layers (str or list, optional): layers (attribute names) open for training. start_eval (int, optional): from which epoch to start evaluation. Default is 0. eval_freq (int, optional): evaluation frequency. Default is -1 (meaning evaluation is only performed at the end of training). dist_metric (str, optional): distance metric used to compute distance matrix between query and gallery. Default is "euclidean". normalize_feature (bool, optional): performs L2 normalization on feature vectors before computing feature distance. Default is False. visrank (bool, optional): visualizes ranked results. Default is False. It is recommended to enable ``visrank`` when ``test_only`` is True. The ranked images will be saved to "save_dir/visrank_dataset", e.g. "save_dir/visrank_market1501". visrank_topk (int, optional): top-k ranked images to be visualized. Default is 10. use_metric_cuhk03 (bool, optional): use single-gallery-shot setting for cuhk03. Default is False. This should be enabled when using cuhk03 classic split. ranks (list, optional): cmc ranks to be computed. Default is [1, 5, 10, 20]. rerank (bool, optional): uses person re-ranking (by Zhong et al. CVPR'17). Default is False. This is only enabled when test_only=True. """ if lr_finder: self.configure_lr_finder(trial, lr_finder) self.backup_model() self.writer = tb_writer time_start = time.time() self.start_epoch = start_epoch self.max_epoch = max_epoch assert start_epoch != max_epoch, "the last epoch number cannot be equal the start one" if self.early_stopping or self.target_metric == 'test_acc': assert eval_freq == 1, "early stopping works only with evaluation on each epoch" self.fixbase_epoch = fixbase_epoch test_acc = AverageMeter() accuracy, should_save_ema_model = 0, False print('=> Start training') if perf_monitor and not lr_finder: perf_monitor.on_train_begin() for self.epoch in range(self.start_epoch, self.max_epoch): # change the NumPy’s seed at every epoch np.random.seed(initial_seed + self.epoch) if perf_monitor and not lr_finder: perf_monitor.on_epoch_begin(self.epoch) if self.compression_ctrl is not None: self.compression_ctrl.scheduler.epoch_step(self.epoch) avg_loss = self.train(print_freq=print_freq, fixbase_epoch=fixbase_epoch, open_layers=open_layers, lr_finder=lr_finder, perf_monitor=perf_monitor, stop_callback=stop_callback) if self.compression_ctrl is not None: statistics = self.compression_ctrl.statistics() print(statistics.to_str()) if self.writer is not None and not lr_finder: for key, value in get_nncf_prepare_for_tensorboard()( statistics).items(): self.writer.add_scalar( "compression/statistics/{0}".format(key), value, len(self.train_loader) * self.epoch) if stop_callback and stop_callback.check_stop(): break if (((self.epoch + 1) >= start_eval and eval_freq > 0 and (self.epoch + 1) % eval_freq == 0 and (self.epoch + 1) != self.max_epoch) or self.epoch == (self.max_epoch - 1)): accuracy, should_save_ema_model = self.test( self.epoch, dist_metric=dist_metric, normalize_feature=normalize_feature, visrank=visrank, visrank_topk=visrank_topk, save_dir=save_dir, use_metric_cuhk03=use_metric_cuhk03, ranks=ranks, lr_finder=lr_finder, ) # update test_acc AverageMeter only if the accuracy is better than the average if accuracy >= test_acc.avg: test_acc.update(accuracy) target_metric = test_acc.avg if self.target_metric == 'test_acc' else avg_loss if perf_monitor and not lr_finder: perf_monitor.on_epoch_end(self.epoch, accuracy) if not lr_finder and not self.per_batch_annealing: self.update_lr(output_avg_metric=target_metric) if lr_finder: print( f"epoch: {self.epoch}\t accuracy: {accuracy}\t lr: {self.get_current_lr()}" ) if trial: trial.report(accuracy, self.epoch) if trial.should_prune(): # restore model before pruning self.restore_model() raise optuna.exceptions.TrialPruned() if not lr_finder: # use smooth (average) accuracy metric for early stopping if the target metric is accuracy should_exit, is_candidate_for_best = self.exit_on_plateau_and_choose_best( accuracy) should_exit = self.early_stopping and should_exit if self.save_all_chkpts: self.save_model( self.epoch, save_dir, is_best=is_candidate_for_best, should_save_ema_model=should_save_ema_model) elif is_candidate_for_best: self.save_model( 0, save_dir, is_best=is_candidate_for_best, should_save_ema_model=should_save_ema_model) if should_exit: if self.compression_ctrl is None or \ (self.compression_ctrl is not None and self.compression_ctrl.compression_stage == get_nncf_complession_stage().FULLY_COMPRESSED): break if perf_monitor and not lr_finder: perf_monitor.on_train_end() if lr_finder and lr_finder.mode != 'fast_ai': self.restore_model() elapsed = round(time.time() - time_start) elapsed = str(datetime.timedelta(seconds=elapsed)) print('Elapsed {}'.format(elapsed)) if self.writer is not None: self.writer.close() return accuracy, self.best_metric
def train( self, epoch, max_epoch, writer, fixbase_epoch=0, open_layers=None, print_freq=10 ): losses = AverageMeter() accs = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() if (epoch + 1) <= fixbase_epoch and open_layers is not None: print( '* Only train {} (epoch: {}/{})'.format( open_layers, epoch + 1, fixbase_epoch ) ) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) num_batches = len(self.train_loader) end = time.time() for batch_idx, data in enumerate(self.train_loader): data_time.update(time.time() - end) imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() # softmax temporature if self.fixed_lmda or self.lmda_decay_step == -1: lmda = self.init_lmda else: lmda = self.init_lmda * self.lmda_decay_rate**( epoch // self.lmda_decay_step ) if lmda < self.min_lmda: lmda = self.min_lmda for k in range(self.mc_iter): outputs = self.model(imgs, lmda=lmda) loss = self._compute_loss(self.criterion, outputs, pids) self.optimizer.zero_grad() loss.backward() self.optimizer.step() batch_time.update(time.time() - end) losses.update(loss.item(), pids.size(0)) accs.update(metrics.accuracy(outputs, pids)[0].item()) if (batch_idx+1) % print_freq == 0: # estimate remaining time eta_seconds = batch_time.avg * ( num_batches - (batch_idx+1) + (max_epoch - (epoch+1)) * num_batches ) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print( 'Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.2f} ({acc.avg:.2f})\t' 'Lr {lr:.6f}\t' 'eta {eta}'.format( epoch + 1, max_epoch, batch_idx + 1, num_batches, batch_time=batch_time, data_time=data_time, loss=losses, acc=accs, lr=self.optimizer.param_groups[0]['lr'], eta=eta_str ) ) if writer is not None: n_iter = epoch*num_batches + batch_idx writer.add_scalar('Train/Time', batch_time.avg, n_iter) writer.add_scalar('Train/Data', data_time.avg, n_iter) writer.add_scalar('Train/Loss', losses.avg, n_iter) writer.add_scalar('Train/Acc', accs.avg, n_iter) writer.add_scalar( 'Train/Lr', self.optimizer.param_groups[0]['lr'], n_iter ) end = time.time() if self.scheduler is not None: self.scheduler.step()
def train(self, epoch, trainloader, fixbase=False, open_layers=None, print_freq=10): """Trains the model for one epoch on source datasets using softmax loss. Args: epoch (int): current epoch. trainloader (Dataloader): training dataloader. fixbase (bool, optional): whether to fix base layers. Default is False. open_layers (str or list, optional): layers open for training. print_freq (int, optional): print frequency. Default is 10. """ losses = AverageMeter() accs = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() if fixbase and (open_layers is not None): open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) end = time.time() for batch_idx, data in enumerate(trainloader): data_time.update(time.time() - end) imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() self.optimizer.zero_grad() outputs = self.model(imgs) loss = self._compute_loss(self.criterion, outputs, pids) loss.backward() self.optimizer.step() batch_time.update(time.time() - end) losses.update(loss.item(), pids.size(0)) accs.update(metrics.accuracy(outputs, pids)[0].item()) if (batch_idx + 1) % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'A-softmax Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Acc {acc.val:.2f} ({acc.avg:.2f})\t'.format( epoch + 1, batch_idx + 1, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses, acc=accs)) end = time.time() if (self.scheduler is not None) and (not fixbase): self.scheduler.step()
def objective(cfg, args, trial): # Generate the trials. # g_ = trial.suggest_int("g_", 1, 7) # asl_pm = trial.suggest_float("asl_pm", 0, 0.5) # m = trial.suggest_float("m", 0.01, 0.7) # s = trial.suggest_int("s", 5, 60) lr = trial.suggest_float("lr", 0.001, 0.5) # t = trial.suggest_int("t", 1, 7) # cfg.loss.softmax.m = m # cfg.loss.softmax.s = s # cfg.loss.asl.p_m = asl_pm # cfg.loss.am_binary.amb_t = t cfg.train.lr = lr # geterate damanager num_aux_models = len(cfg.mutual_learning.aux_configs) datamanager = build_datamanager(cfg, args.classes) # build the model. num_train_classes = datamanager.num_train_pids print('Building main model: {}'.format(cfg.model.name)) model = torchreid.models.build_model( **model_kwargs(cfg, num_train_classes)) aux_lr = cfg.train.lr # placeholder, needed for aux models, may be filled by nncf part below compression_ctrl = None should_freeze_aux_models = False nncf_metainfo = None optimizer = torchreid.optim.build_optimizer(model, **optimizer_kwargs(cfg)) scheduler = torchreid.optim.build_lr_scheduler( optimizer=optimizer, num_iter=datamanager.num_iter, **lr_scheduler_kwargs(cfg)) # Loading model (and optimizer and scheduler in case of resuming training). if cfg.model.load_weights and check_isfile(cfg.model.load_weights): load_pretrained_weights(model, cfg.model.load_weights) if cfg.model.type == 'classification': check_classification_classes(model, datamanager, args.classes, test_only=cfg.test.evaluate) model, extra_device_ids = put_main_model_on_the_device( model, cfg.use_gpu, args.gpu_num, num_aux_models, args.split_models) num_aux_models = len(cfg.mutual_learning.aux_configs) num_train_classes = datamanager.num_train_pids if num_aux_models > 0: print( f'Enabled mutual learning between {len(cfg.mutual_learning.aux_configs) + 1} models.' ) models, optimizers, schedulers = [model], [optimizer], [scheduler] for config_file, device_ids in zip(cfg.mutual_learning.aux_configs, extra_device_ids): aux_model, aux_optimizer, aux_scheduler = build_auxiliary_model( config_file, num_train_classes, cfg.use_gpu, device_ids, num_iter=datamanager.num_iter, lr=aux_lr, aux_config_opts=args.aux_config_opts) models.append(aux_model) optimizers.append(aux_optimizer) schedulers.append(aux_scheduler) else: models, optimizers, schedulers = model, optimizer, scheduler print(f'Building {cfg.loss.name}-engine') engine = build_engine(cfg, datamanager, models, optimizers, schedulers, should_freeze_aux_models=should_freeze_aux_models, nncf_metainfo=nncf_metainfo, compression_ctrl=compression_ctrl, initial_lr=aux_lr) test_acc = AverageMeter() obj = 0 engine.start_epoch = 0 engine.max_epoch = args.epochs print(f"\nnext trial with [lr: {lr}]") for engine.epoch in range(args.epochs): np.random.seed(cfg.train.seed + engine.epoch) avg_loss = engine.train(print_freq=20000, fixbase_epoch=0, open_layers=None, lr_finder=False, perf_monitor=None, stop_callback=None) top1, _ = engine.test( engine.epoch, lr_finder=False, ) test_acc.update(top1) smooth_top1 = test_acc.avg target_metric = smooth_top1 if engine.target_metric == 'test_acc' else avg_loss obj = top1 if not engine.per_batch_annealing: engine.update_lr(output_avg_metric=target_metric) trial.report(obj, engine.epoch) # Handle pruning based on the intermediate value. if trial.should_prune(): raise optuna.exceptions.TrialPruned() should_exit, _ = engine.exit_on_plateau_and_choose_best( top1, smooth_top1) should_exit = engine.early_stoping and should_exit if should_exit: break return obj
class Engine(object): r"""A generic base Engine class for both image- and video-reid. Args: datamanager (DataManager): an instance of ``torchreid.data.ImageDataManager`` or ``torchreid.data.VideoDataManager``. model (nn.Module): model instance. optimizer (Optimizer): an Optimizer. scheduler (LRScheduler, optional): if None, no learning rate decay will be performed. use_gpu (bool, optional): use gpu. Default is True. """ def __init__(self, datamanager, model, optimizer=None, scheduler=None, use_gpu=True): self.datamanager = datamanager self.model = model self.optimizer = optimizer self.scheduler = scheduler self.use_gpu = (torch.cuda.is_available() and use_gpu) self.writer = None self.map_v = AverageMeter() self.rank1_v = AverageMeter() # check attributes if not isinstance(self.model, nn.Module): raise TypeError('model must be an instance of nn.Module') def run(self, save_dir='log', max_epoch=0, start_epoch=0, fixbase_epoch=0, open_layers=None, start_eval=0, eval_freq=-1, test_only=False, print_freq=10, dist_metric='euclidean', normalize_feature=False, visrank=False, visrank_topk=10, use_metric_cuhk03=False, ranks=[1, 5, 10, 20], rerank=False, visactmap=False): r"""A unified pipeline for training and evaluating a model. Args: save_dir (str): directory to save model. max_epoch (int): maximum epoch. start_epoch (int, optional): starting epoch. Default is 0. fixbase_epoch (int, optional): number of epochs to train ``open_layers`` (new layers) while keeping base layers frozen. Default is 0. ``fixbase_epoch`` is counted in ``max_epoch``. open_layers (str or list, optional): layers (attribute names) open for training. start_eval (int, optional): from which epoch to start evaluation. Default is 0. eval_freq (int, optional): evaluation frequency. Default is -1 (meaning evaluation is only performed at the end of training). test_only (bool, optional): if True, only runs evaluation on test datasets. Default is False. print_freq (int, optional): print_frequency. Default is 10. dist_metric (str, optional): distance metric used to compute distance matrix between query and gallery. Default is "euclidean". normalize_feature (bool, optional): performs L2 normalization on feature vectors before computing feature distance. Default is False. visrank (bool, optional): visualizes ranked results. Default is False. It is recommended to enable ``visrank`` when ``test_only`` is True. The ranked images will be saved to "save_dir/visrank_dataset", e.g. "save_dir/visrank_market1501". visrank_topk (int, optional): top-k ranked images to be visualized. Default is 10. use_metric_cuhk03 (bool, optional): use single-gallery-shot setting for cuhk03. Default is False. This should be enabled when using cuhk03 classic split. ranks (list, optional): cmc ranks to be computed. Default is [1, 5, 10, 20]. rerank (bool, optional): uses person re-ranking (by Zhong et al. CVPR'17). Default is False. This is only enabled when test_only=True. visactmap (bool, optional): visualizes activation maps. Default is False. """ trainloader, testloader = self.datamanager.return_dataloaders() if visrank and not test_only: raise ValueError('visrank=True is valid only if test_only=True') if test_only: self.test(0, testloader, dist_metric=dist_metric, normalize_feature=normalize_feature, visrank=visrank, visrank_topk=visrank_topk, save_dir=save_dir, use_metric_cuhk03=use_metric_cuhk03, ranks=ranks, rerank=rerank) return if self.writer is None: self.writer = SummaryWriter(log_dir=save_dir) if visactmap: self.visactmap(testloader, save_dir, self.datamanager.width, self.datamanager.height, print_freq) return time_start = time.time() print('=> Start training') for epoch in range(start_epoch, max_epoch): self.train(epoch, max_epoch, trainloader, fixbase_epoch, open_layers, print_freq) if (epoch + 1) >= start_eval and eval_freq > 0 and ( epoch + 1) % eval_freq == 0 and (epoch + 1) != max_epoch: rank1 = self.test(epoch, testloader, dist_metric=dist_metric, normalize_feature=normalize_feature, visrank=visrank, visrank_topk=visrank_topk, save_dir=save_dir, use_metric_cuhk03=use_metric_cuhk03, ranks=ranks) # self._save_checkpoint(epoch, rank1, save_dir) if max_epoch > 0: print('=> Final test') rank1 = self.test(epoch, testloader, dist_metric=dist_metric, normalize_feature=normalize_feature, visrank=visrank, visrank_topk=visrank_topk, save_dir=save_dir, use_metric_cuhk03=use_metric_cuhk03, ranks=ranks) # self._save_checkpoint(epoch, rank1, save_dir) elapsed = round(time.time() - time_start) elapsed = str(datetime.timedelta(seconds=elapsed)) print('Elapsed {}'.format(elapsed)) if self.writer is None: self.writer.close() def train(self): r"""Performs training on source datasets for one epoch. This will be called every epoch in ``run()``, e.g. .. code-block:: python for epoch in range(start_epoch, max_epoch): self.train(some_arguments) .. note:: This must be implemented in subclasses. """ raise NotImplementedError def test(self, epoch, testloader, dist_metric='euclidean', normalize_feature=False, visrank=False, visrank_topk=10, save_dir='', use_metric_cuhk03=False, ranks=[1, 5, 10, 20], rerank=False): r"""Tests model on target datasets. .. note:: This function has been called in ``run()``. .. note:: The test pipeline implemented in this function suits both image- and video-reid. In general, a subclass of Engine only needs to re-implement ``_extract_features()`` and ``_parse_data_for_eval()`` (most of the time), but not a must. Please refer to the source code for more details. """ targets = list(testloader.keys()) for name in targets: domain = 'source' if name in self.datamanager.sources else 'target' print('##### Evaluating {} ({}) #####'.format(name, domain)) queryloader = testloader[name]['query'] galleryloader = testloader[name]['gallery'] rank1 = self._evaluate(epoch, dataset_name=name, queryloader=queryloader, galleryloader=galleryloader, dist_metric=dist_metric, normalize_feature=normalize_feature, visrank=visrank, visrank_topk=visrank_topk, save_dir=save_dir, use_metric_cuhk03=use_metric_cuhk03, ranks=ranks, rerank=rerank) return rank1 @torch.no_grad() def _evaluate(self, epoch, dataset_name='', queryloader=None, galleryloader=None, dist_metric='euclidean', normalize_feature=False, visrank=False, visrank_topk=10, save_dir='', use_metric_cuhk03=False, ranks=[1, 5, 10, 20], rerank=False): batch_time = AverageMeter() print('Extracting features from query set ...') qf, q_pids, q_camids = [], [], [ ] # query features, query person IDs and query camera IDs for batch_idx, data in enumerate(queryloader): imgs, pids, camids = self._parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() features = self._extract_features(imgs) batch_time.update(time.time() - end) features = features.data.cpu() qf.append(features) q_pids.extend(pids) q_camids.extend(camids) qf = torch.cat(qf, 0) q_pids = np.asarray(q_pids) q_camids = np.asarray(q_camids) print('Done, obtained {}-by-{} matrix'.format(qf.size(0), qf.size(1))) print('Extracting features from gallery set ...') gf, g_pids, g_camids = [], [], [ ] # gallery features, gallery person IDs and gallery camera IDs end = time.time() for batch_idx, data in enumerate(galleryloader): imgs, pids, camids = self._parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() features = self._extract_features(imgs) batch_time.update(time.time() - end) features = features.data.cpu() gf.append(features) g_pids.extend(pids) g_camids.extend(camids) gf = torch.cat(gf, 0) g_pids = np.asarray(g_pids) g_camids = np.asarray(g_camids) print('Done, obtained {}-by-{} matrix'.format(gf.size(0), gf.size(1))) print('Speed: {:.4f} sec/batch'.format(batch_time.avg)) if normalize_feature: print('Normalzing features with L2 norm ...') qf = F.normalize(qf, p=2, dim=1) gf = F.normalize(gf, p=2, dim=1) print( 'Computing distance matrix with metric={} ...'.format(dist_metric)) distmat = metrics.compute_distance_matrix(qf, gf, dist_metric) distmat = distmat.numpy() if rerank: print('Applying person re-ranking ...') distmat_qq = metrics.compute_distance_matrix(qf, qf, dist_metric) distmat_gg = metrics.compute_distance_matrix(gf, gf, dist_metric) distmat = re_ranking(distmat, distmat_qq, distmat_gg) print('Computing CMC and mAP ...') cmc, mAP = metrics.evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=use_metric_cuhk03) print('** Results **') print('mAP: {:.1%}'.format(mAP)) print('CMC curve') for r in ranks: print('Rank-{:<3}: {:.1%}'.format(r, cmc[r - 1])) self.map_v.update(mAP) self.rank1_v.update(cmc[0]) self.writer.add_scalar('Test/mAP', self.map_v.avg, epoch) self.writer.add_scalar('Test/rank1', self.rank1_v.avg, epoch) if visrank: visualize_ranked_results( distmat, self.datamanager.return_testdataset_by_name(dataset_name), self.datamanager.data_type, width=self.datamanager.width, height=self.datamanager.height, save_dir=osp.join(save_dir, 'visrank_' + dataset_name), topk=visrank_topk) return cmc[0] @torch.no_grad() def visactmap(self, testloader, save_dir, width, height, print_freq): """Visualizes CNN activation maps to see where the CNN focuses on to extract features. This function takes as input the query images of target datasets Reference: - Zagoruyko and Komodakis. Paying more attention to attention: Improving the performance of convolutional neural networks via attention transfer. ICLR, 2017 - Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019. """ self.model.eval() imagenet_mean = [0.485, 0.456, 0.406] imagenet_std = [0.229, 0.224, 0.225] for target in list(testloader.keys()): queryloader = testloader[target]['query'] # original images and activation maps are saved individually actmap_dir = osp.join(save_dir, 'actmap_' + target) mkdir_if_missing(actmap_dir) print('Visualizing activation maps for {} ...'.format(target)) for batch_idx, data in enumerate(queryloader): imgs, paths = data[0], data[3] if self.use_gpu: imgs = imgs.cuda() # forward to get convolutional feature maps try: outputs = self.model(imgs, return_featuremaps=True) except TypeError: raise TypeError('forward() got unexpected keyword argument "return_featuremaps". ' \ 'Please add return_featuremaps as an input argument to forward(). When ' \ 'return_featuremaps=True, return feature maps only.') if outputs.dim() != 4: raise ValueError('The model output is supposed to have ' \ 'shape of (b, c, h, w), i.e. 4 dimensions, but got {} dimensions. ' 'Please make sure you set the model output at eval mode ' 'to be the last convolutional feature maps'.format(outputs.dim())) # compute activation maps outputs = (outputs**2).sum(1) b, h, w = outputs.size() outputs = outputs.view(b, h * w) outputs = F.normalize(outputs, p=2, dim=1) outputs = outputs.view(b, h, w) if self.use_gpu: imgs, outputs = imgs.cpu(), outputs.cpu() for j in range(outputs.size(0)): # get image name path = paths[j] imname = osp.basename(osp.splitext(path)[0]) # RGB image img = imgs[j, ...] for t, m, s in zip(img, imagenet_mean, imagenet_std): t.mul_(s).add_(m).clamp_(0, 1) img_np = np.uint8(np.floor(img.numpy() * 255)) img_np = img_np.transpose( (1, 2, 0)) # (c, h, w) -> (h, w, c) # activation map am = outputs[j, ...].numpy() am = cv2.resize(am, (width, height)) am = 255 * (am - np.max(am)) / (np.max(am) - np.min(am) + 1e-12) am = np.uint8(np.floor(am)) am = cv2.applyColorMap(am, cv2.COLORMAP_JET) # overlapped overlapped = img_np * 0.3 + am * 0.7 overlapped[overlapped > 255] = 255 overlapped = overlapped.astype(np.uint8) # save images in a single figure (add white spacing between images) # from left to right: original image, activation map, overlapped image grid_img = 255 * np.ones( (height, 3 * width + 2 * GRID_SPACING, 3), dtype=np.uint8) grid_img[:, :width, :] = img_np[:, :, ::-1] grid_img[:, width + GRID_SPACING:2 * width + GRID_SPACING, :] = am grid_img[:, 2 * width + 2 * GRID_SPACING:, :] = overlapped cv2.imwrite(osp.join(actmap_dir, imname + '.jpg'), grid_img) if (batch_idx + 1) % print_freq == 0: print('- done batch {}/{}'.format(batch_idx + 1, len(queryloader))) def _compute_loss(self, criterion, outputs, targets): if isinstance(outputs, (tuple, list)): loss = DeepSupervision(criterion, outputs, targets) else: loss = criterion(outputs, targets) return loss def _extract_features(self, input): self.model.eval() return self.model(input) def _parse_data_for_train(self, data): imgs = data[0] pids = data[1] return imgs, pids def _parse_data_for_eval(self, data): imgs = data[0] pids = data[1] camids = data[2] return imgs, pids, camids def _save_checkpoint(self, epoch, rank1, save_dir, is_best=False): save_checkpoint( { 'state_dict': self.model.state_dict(), 'epoch': epoch + 1, 'rank1': rank1, 'optimizer': self.optimizer.state_dict(), }, save_dir, is_best=is_best)
def train( self, epoch, max_epoch, writer, print_freq=10, fixbase_epoch=0, open_layers=None, ): losses_triplet = AverageMeter() losses_softmax = AverageMeter() losses_mmd_bc = AverageMeter() losses_mmd_wc = AverageMeter() losses_mmd_global = AverageMeter() losses_recons = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() self.mgn_targetPredict.train() if (epoch + 1) <= fixbase_epoch and open_layers is not None: print( '* Only train {} (epoch: {}/{})'.format( open_layers, epoch + 1, fixbase_epoch ) ) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) open_all_layers(self.mgn_targetPredict) print("All open layers!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") num_batches = len(self.train_loader) end = time.time() # -------------------------------------------------------------------------------------------------------------------- # for batch_idx, (data, data_t) in enumerate(zip(self.train_loader, self.train_loader_t)): data_time.update(time.time() - end) imgs, pids = self._parse_data_for_train(data) imgs_clean = imgs.clone().cuda() lam=0 imgs_t, pids_t = self._parse_data_for_train(data_t) imagest_orig=imgs_t.cuda() labels=[] labelss=[] random_indexS = np.random.randint(0, imgs.size()[0]) random_indexT = np.random.randint(0, imgs_t.size()[0]) if epoch > 10 and epoch < 35: for i, img in enumerate(imgs): randmt = RandomErasing(probability=0.5,sl=0.07, sh=0.22) imgs[i],p = randmt(img, imgs[random_indexS]) labelss.append(p) if epoch >= 35: randmt = RandomErasing(probability=0.5,sl=0.1, sh=0.25) for i, img in enumerate(imgs): imgs[i],p = randmt(img,imgs[random_indexS]) labelss.append(p) if epoch > 10 and epoch < 35: randmt = RandomErasing(probability=0.5,sl=0.1, sh=0.2) for i, img in enumerate(imgs_t): imgs_t[i],p = randmt(img,imgs_t[random_indexT]) labels.append(p) if epoch >= 35 and epoch < 75: randmt = RandomErasing(probability=0.5,sl=0.2, sh=0.3) for i, img in enumerate(imgs_t): imgs_t[i],p = randmt(img,imgs_t[random_indexT]) labels.append(p) if epoch >= 75: randmt = RandomErasing(probability=0.5,sl=0.2, sh=0.35) for i, img in enumerate(imgs_t): imgs_t[i],p = randmt(img,imgs_t[random_indexT]) labels.append(p) binary_labels = torch.tensor(np.asarray(labels)).cuda() binary_labelss = torch.tensor(np.asarray(labelss)).cuda() if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() if self.use_gpu: imgs_transformed = imgs_t.cuda() self.optimizer.zero_grad() imgs_clean = imgs outputs, output2, recons,bcc1, bocc2,bocc3 = self.model(imgs) occ_losss1 = self.BCE_criterion(bcc1.squeeze(1),binary_labelss.float() ) occ_losss2 = self.BCE_criterion(bocc2.squeeze(1),binary_labelss.float() ) occ_losss3 = self.BCE_criterion(bocc3.squeeze(1),binary_labelss.float() ) occ_s = occ_losss1 +occ_losss2+occ_losss3 ##############CUT MIX#################################3333 """bbx1, bby1, bbx2, bby2 = self.rand_bbox(imgs.size(), lam) rand_index = torch.randperm(imgs.size()[0]).cuda() imgs[:, :, bbx1:bbx2, bby1:bby2] = imgs[rand_index, :, bbx1:bbx2, bby1:bby2] targeta = pids targetb = pids[rand_index]""" ##############CUT MIX#################################3333 outputs_t, output2_t, recons_t,bocct1, bocct2,bocct3 = self.model(imagest_orig) outputs_t = self.mgn_targetPredict(output2_t) loss_reconst=self.criterion_mse(recons_t, imagest_orig) loss_recons=self.criterion_mse(recons, imgs_clean) occ_loss1 = self.BCE_criterion(bocct1.squeeze(1),binary_labels.float() ) occ_loss2 = self.BCE_criterion(bocct2.squeeze(1),binary_labels.float() ) occ_loss3 = self.BCE_criterion(bocct3.squeeze(1),binary_labels.float() ) occ_t = occ_loss1 + occ_loss2 + occ_loss3 pids_t = pids_t.cuda() loss_x = self.mgn_loss(outputs, pids) loss_x_t = self.mgn_loss(outputs_t, pids_t) #loss_x_t = self._compute_loss(self.criterion_x, y, targeta) #*lam + self._compute_loss(self.criterion_x, y, targetb)*(1-lam) #loss_t_t = self._compute_loss(self.criterion_t, features_t, targeta)*lam + self._compute_loss(self.criterion_t, features_t, targetb)*(1-lam) if epoch > 10: loss_mmd_wc, loss_mmd_bc, loss_mmd_global = self._compute_loss(self.criterion_mmd, outputs[0], outputs_t[0]) #loss_mmd_wc1, loss_mmd_bc1, loss_mmd_global1 = self._compute_loss(self.criterion_mmd, outputs[2], outputs_t[2]) #loss_mmd_wc3, loss_mmd_bc3, loss_mmd_global3 = self._compute_loss(self.criterion_mmd, outputs[3], outputs_t[3]) #loss_mmd_wcf = loss_mmd_wc+loss_mmd_wc1+loss_mmd_wc3 #loss_mmd_bcf = loss_mmd_bc+loss_mmd_bc1+loss_mmd_bc3 #loss_mmd_globalf = loss_mmd_global+loss_mmd_global1+loss_mmd_global3 #print(loss_mmd_bc.item()) l_joint = 1.5*loss_x_t +loss_x +loss_reconst+loss_recons #self.weight_r*loss_recons+ + loss_x + loss_t #loss = loss_t + loss_x + loss_mmd_bc + loss_mmd_wc l_d = 0.5*loss_mmd_bc + 0.8*loss_mmd_wc +loss_mmd_global #+loss_mmd_bc1 + loss_mmd_wc1 +loss_mmd_global1 +loss_mmd_bc3 + loss_mmd_wc3 +loss_mmd_global3 loss = 0.3*l_d + 0.7*l_joint +0.2*occ_t + 0.1*occ_s self.optimizer.zero_grad() loss.backward() self.optimizer.step() # -------------------------------------------------------------------------------------------------------------------- # batch_time.update(time.time() - end) #losses_triplet.update(loss_t.item(), pids.size(0)) losses_softmax.update(loss_x_t.item(), pids.size(0)) #losses_recons.update(loss_recons.item(), pids.size(0)) if epoch > 10: losses_mmd_bc.update(loss_mmd_bc.item(), pids.size(0)) losses_mmd_wc.update(loss_mmd_wc.item(), pids.size(0)) losses_mmd_global.update(loss_mmd_global.item(), pids.size(0)) if (batch_idx + 1) % print_freq == 0: # estimate remaining time eta_seconds = batch_time.avg * ( num_batches - (batch_idx + 1) + (max_epoch - (epoch + 1)) * num_batches ) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print( 'Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' #'Loss_t {losses1.val:.4f} ({losses1.avg:.4f})\t' 'Loss_x {losses2.val:.4f} ({losses2.avg:.4f})\t' 'Loss_mmd_wc {losses3.val:.4f} ({losses3.avg:.4f})\t' 'Loss_mmd_bc {losses4.val:.4f} ({losses4.avg:.4f})\t' 'Loss_mmd_global {losses5.val:.4f} ({losses5.avg:.4f})\t' #'Loss_recons {losses6.val:.4f} ({losses6.avg:.4f})\t' 'eta {eta}'.format( epoch + 1, max_epoch, batch_idx + 1, num_batches, batch_time=batch_time, #losses1=losses_triplet, losses2=losses_softmax, losses3=losses_mmd_wc, losses4=losses_mmd_bc, losses5=losses_mmd_global, #losses6 = losses_recons, eta=eta_str ) ) writer = None if writer is not None: n_iter = epoch * num_batches + batch_idx writer.add_scalar('Train/Time', batch_time.avg, n_iter) writer.add_scalar('Train/Loss_triplet', losses_triplet.avg, n_iter) writer.add_scalar('Train/Loss_softmax', losses_softmax.avg, n_iter) writer.add_scalar('Train/Loss_mmd_bc', losses_mmd_bc.avg, n_iter) writer.add_scalar('Train/Loss_mmd_wc', losses_mmd_wc.avg, n_iter) writer.add_scalar('Train/Loss_mmd_global', losses_mmd_global.avg, n_iter) writer.add_scalar( 'Train/Lr', self.optimizer.param_groups[0]['lr'], n_iter ) end = time.time() if self.scheduler is not None: self.scheduler.step() print_distri = True if print_distri: instances = self.datamanager.test_loader.query_loader.num_instances batch_size = self.datamanager.test_loader.batch_size feature_size = outputs[0].size(1) # features_t.shape[1] # 2048 features_t = outputs_t[0] features = outputs[0] t = torch.reshape(features_t, (int(batch_size / instances), instances, feature_size)) # and compute bc/wc euclidean distance bct = compute_distance_matrix(t[0], t[0]) wct = compute_distance_matrix(t[0], t[1]) for i in t[1:]: bct = torch.cat((bct, compute_distance_matrix(i, i))) for j in t: if j is not i: wct = torch.cat((wct, compute_distance_matrix(i, j))) s = torch.reshape(features, (int(batch_size / instances), instances, feature_size)) bcs = compute_distance_matrix(s[0], s[0]) wcs = compute_distance_matrix(s[0], s[1]) for i in s[1:]: bcs = torch.cat((bcs, compute_distance_matrix(i, i))) for j in s: if j is not i: wcs = torch.cat((wcs, compute_distance_matrix(i, j))) bcs = bcs.detach() wcs = wcs.detach() b_c = [x.cpu().detach().item() for x in bcs.flatten() if x > 0.000001] w_c = [x.cpu().detach().item() for x in wcs.flatten() if x > 0.000001] data_bc = norm.rvs(b_c) sns.distplot(data_bc, bins='auto', fit=norm, kde=False, label='from the same class (within class)') data_wc = norm.rvs(w_c) sns.distplot(data_wc, bins='auto', fit=norm, kde=False, label='from different class (between class)') plt.xlabel('Euclidean distance') plt.ylabel('Frequency') plt.title('Source Domain') plt.legend() plt.savefig("Source.png") plt.clf() b_ct = [x.cpu().detach().item() for x in bct.flatten() if x > 0.1] w_ct = [x.cpu().detach().item() for x in wct.flatten() if x > 0.1] data_bc = norm.rvs(b_ct) sns.distplot(data_bc, bins='auto', fit=norm, kde=False, label='from the same class (within class)') data_wc = norm.rvs(w_ct) sns.distplot(data_wc, bins='auto', fit=norm, kde=False, label='from different class (between class)') plt.xlabel('Euclidean distance') plt.ylabel('Frequency') plt.title('Target Domain') plt.legend() plt.savefig("Target.png")
def train(self, epoch, max_epoch, writer, print_freq=10, fixbase_epoch=0, open_layers=None): losses_t = AverageMeter() losses_x = AverageMeter() accs = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() if (epoch + 1) <= fixbase_epoch and open_layers is not None: print('* Only train {} (epoch: {}/{})'.format( open_layers, epoch + 1, fixbase_epoch)) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) num_batches = len(self.train_loader) end = time.time() for batch_idx, data in enumerate(self.train_loader): data_time.update(time.time() - end) imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() self.optimizer.zero_grad() outputs, features = self.model(imgs) loss_t = self._compute_loss(self.criterion_t, features, pids) loss_x = self._compute_loss(self.criterion_x, outputs, pids) loss = self.weight_t * loss_t + self.weight_x * loss_x loss.backward() self.optimizer.step() batch_time.update(time.time() - end) losses_t.update(loss_t.item(), pids.size(0)) losses_x.update(loss_x.item(), pids.size(0)) accs.update(metrics.accuracy(outputs, pids)[0].item()) if (batch_idx + 1) % print_freq == 0: # estimate remaining time eta_seconds = batch_time.avg * (num_batches - (batch_idx + 1) + (max_epoch - (epoch + 1)) * num_batches) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print('Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss_t {loss_t.val:.4f} ({loss_t.avg:.4f})\t' 'Loss_x {loss_x.val:.4f} ({loss_x.avg:.4f})\t' 'Acc {acc.val:.2f} ({acc.avg:.2f})\t' 'Lr {lr:.6f}\t' 'eta {eta}'.format( epoch + 1, max_epoch, batch_idx + 1, num_batches, batch_time=batch_time, data_time=data_time, loss_t=losses_t, loss_x=losses_x, acc=accs, lr=self.optimizer.param_groups[0]['lr'], eta=eta_str)) if writer is not None: n_iter = epoch * num_batches + batch_idx writer.add_scalar('Train/Time', batch_time.avg, n_iter) writer.add_scalar('Train/Data', data_time.avg, n_iter) writer.add_scalar('Train/Loss_t', losses_t.avg, n_iter) writer.add_scalar('Train/Loss_x', losses_x.avg, n_iter) writer.add_scalar('Train/Acc', accs.avg, n_iter) writer.add_scalar('Train/Lr', self.optimizer.param_groups[0]['lr'], n_iter) end = time.time() if self.scheduler is not None: self.scheduler.step()
def train(epoch, model, criterion, optimizer, scheduler, trainloader, use_gpu): losses = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() model.train() if (epoch + 1) <= args.fixbase_epoch and args.open_layers is not None: print('* Only train {} (epoch: {}/{})'.format(args.open_layers, epoch + 1, args.fixbase_epoch)) open_specified_layers(model, args.open_layers) else: open_all_layers(model) end = time.time() for batch_idx, data in enumerate(trainloader): data_time.update(time.time() - end) imgs, attrs = data[0], data[1] if use_gpu: imgs = imgs.cuda() attrs = attrs.cuda() optimizer.zero_grad() outputs = model(imgs) loss = criterion(outputs, attrs) loss.backward() optimizer.step() batch_time.update(time.time() - end) losses.update(loss.item(), imgs.size(0)) if (batch_idx + 1) % args.print_freq == 0: # estimate remaining time num_batches = len(trainloader) eta_seconds = batch_time.avg * (num_batches - (batch_idx + 1) + (args.max_epoch - (epoch + 1)) * num_batches) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print('Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Lr {lr:.6f}\t' 'Eta {eta}'.format(epoch + 1, args.max_epoch, batch_idx + 1, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses, lr=optimizer.param_groups[0]['lr'], eta=eta_str)) end = time.time() scheduler.step()
def train(self, epoch, max_epoch, trainloader, fixbase_epoch=0, open_layers=None, print_freq=10): losses = AverageMeter() reg_ow_loss = AverageMeter() metric_loss = AverageMeter() accs = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() self.model.train() if (epoch + 1) <= fixbase_epoch and open_layers is not None: print('* Only train {} (epoch: {}/{})'.format(open_layers, epoch+1, fixbase_epoch)) open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) num_batches = len(trainloader) start_time = time.time() for batch_idx, data in enumerate(trainloader): data_time.update(time.time() - start_time) imgs, pids = self._parse_data_for_train(data) imgs, pids = self._apply_batch_transform(imgs, pids) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() self.optimizer.zero_grad() if self.metric_loss is not None: embeddings, outputs = self.model(imgs, get_embeddings=True) else: outputs = self.model(imgs) loss = self._compute_loss(self.criterion, outputs, pids) if (epoch + 1) > fixbase_epoch: reg_loss = self.regularizer(self.model) reg_ow_loss.update(reg_loss.item(), pids.size(0)) loss += reg_loss if self.metric_loss is not None: metric_val = self.metric_loss(F.normalize(embeddings, dim=1), outputs, pids) loss += metric_val metric_loss.update(metric_val.item(), pids.size(0)) loss.backward() self.optimizer.step() losses.update(loss.item(), pids.size(0)) accs.update(metrics.accuracy(outputs, pids)[0].item()) batch_time.update(time.time() - start_time) if print_freq > 0 and (batch_idx + 1) % print_freq == 0: eta_seconds = batch_time.avg * (num_batches-(batch_idx + 1) + (max_epoch - (epoch + 1)) * num_batches) eta_str = str(datetime.timedelta(seconds=int(eta_seconds))) print('Epoch: [{0}/{1}][{2}/{3}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'AUX Losses {aux_losses.val:.4f} ({aux_losses.avg:.4f})\t' 'Acc {acc.val:.2f} ({acc.avg:.2f})\t' 'Lr {lr:.6f}\t' 'eta {eta}'. format( epoch + 1, max_epoch, batch_idx + 1, num_batches, batch_time=batch_time, data_time=data_time, aux_losses=metric_loss, loss=losses, acc=accs, lr=self.optimizer.param_groups[0]['lr'], eta=eta_str, ) ) if self.writer is not None: n_iter = epoch * num_batches + batch_idx self.writer.add_scalar('Train/Time', batch_time.avg, n_iter) self.writer.add_scalar('Train/Data', data_time.avg, n_iter) info = self.criterion.get_last_info() for k in info: self.writer.add_scalar('AUX info/' + k, info[k], n_iter) self.writer.add_scalar('Loss/train', losses.avg, n_iter) if (epoch + 1) > fixbase_epoch: self.writer.add_scalar('Loss/reg_ow', reg_ow_loss.avg, n_iter) self.writer.add_scalar('Accuracy/train', accs.avg, n_iter) self.writer.add_scalar('Learning rate', self.optimizer.param_groups[0]['lr'], n_iter) if self.metric_loss is not None: self.writer.add_scalar('Loss/local_push_loss', metric_val.item(), n_iter) start_time = time.time() if self.scheduler is not None: self.scheduler.step()
def test(model, testloader, attr_dict, use_gpu): batch_time = AverageMeter() model.eval() num_persons = 0 prob_thre = 0.5 ins_acc = 0 ins_prec = 0 ins_rec = 0 mA_history = { 'correct_pos': 0, 'real_pos': 0, 'correct_neg': 0, 'real_neg': 0 } print('Testing ...') for batch_idx, data in enumerate(testloader): imgs, attrs, img_paths = data print("imgs shape:{}".format(imgs.shape)) if use_gpu: imgs = imgs.cuda() end = time.time() orig_outputs = model(imgs) batch_time.update(time.time() - end) orig_outputs = orig_outputs.data.cpu().numpy() # print("orig_outputs:{}".format(orig_outputs)) attrs = attrs.data.numpy() # transform raw outputs to attributes (binary codes) outputs = copy.deepcopy(orig_outputs) outputs[outputs < prob_thre] = 0 outputs[outputs >= prob_thre] = 1 # print("outputs end:{}".format(outputs)) # compute label-based metric overlaps = outputs * attrs mA_history['correct_pos'] += overlaps.sum(0) mA_history['real_pos'] += attrs.sum(0) inv_overlaps = (1 - outputs) * (1 - attrs) mA_history['correct_neg'] += inv_overlaps.sum(0) mA_history['real_neg'] += (1 - attrs).sum(0) outputs = outputs.astype(bool) attrs = attrs.astype(bool) # compute instabce-based accuracy intersect = (outputs & attrs).astype(float) union = (outputs | attrs).astype(float) ins_acc += (intersect.sum(1) / union.sum(1)).sum() ins_prec += (intersect.sum(1) / outputs.astype(float).sum(1)).sum() ins_rec += (intersect.sum(1) / attrs.astype(float).sum(1)).sum() num_persons += imgs.size(0) if (batch_idx + 1) % args.print_freq == 0: print('Processed batch {}/{}'.format(batch_idx + 1, len(testloader))) if args.save_prediction: txtfile = open(osp.join(args.save_dir, 'prediction.txt'), 'a') for idx in range(imgs.size(0)): img_path = img_paths[idx] probs = orig_outputs[idx, :] labels = attrs[idx, :] txtfile.write('{}\n'.format(img_path)) txtfile.write('*** Correct prediction ***\n') for attr_idx, (label, prob) in enumerate(zip(labels, probs)): if label: attr_name = attr_dict[attr_idx] info = '{}: {:.1%} '.format(attr_name, prob) txtfile.write(info) txtfile.write('\n*** Incorrect prediction ***\n') for attr_idx, (label, prob) in enumerate(zip(labels, probs)): if not label and prob > 0.5: attr_name = attr_dict[attr_idx] info = '{}: {:.1%} '.format(attr_name, prob) txtfile.write(info) txtfile.write('\n\n') txtfile.close() print('=> BatchTime(s)/BatchSize(img): {:.4f}/{}'.format( batch_time.avg, args.batch_size)) ins_acc /= num_persons ins_prec /= num_persons ins_rec /= num_persons ins_f1 = (2 * ins_prec * ins_rec) / (ins_prec + ins_rec) term1 = mA_history['correct_pos'] / mA_history['real_pos'] term2 = mA_history['correct_neg'] / mA_history['real_neg'] label_mA_verbose = (term1 + term2) * 0.5 label_mA = label_mA_verbose.mean() print('* Results *') print(' # test persons: {}'.format(num_persons)) print(' (instance-based) accuracy: {:.1%}'.format(ins_acc)) print(' (instance-based) precition: {:.1%}'.format(ins_prec)) print(' (instance-based) recall: {:.1%}'.format(ins_rec)) print(' (instance-based) f1-score: {:.1%}'.format(ins_f1)) print(' (label-based) mean accuracy: {:.1%}'.format(label_mA)) print(' mA for each attribute: {}'.format(label_mA_verbose)) return label_mA, ins_acc, ins_prec, ins_rec, ins_f1
def train(self, epoch, trainloader, fixbase=False, open_layers=None, print_freq=10): """Trains the model for one epoch on source datasets using softmax loss. Args: epoch (int): current epoch. trainloader (Dataloader): training dataloader. fixbase (bool, optional): whether to fix base layers. Default is False. open_layers (str or list, optional): layers open for training. print_freq (int, optional): print frequency. Default is 10. """ losses = AverageMeter() accs = AverageMeter() batch_time = AverageMeter() data_time = AverageMeter() p_nums = AverageMeter() n_nums = AverageMeter() self.model.train() if fixbase and (open_layers is not None): open_specified_layers(self.model, open_layers) else: open_all_layers(self.model) end = time.time() # print('QQ') # for batch_idx, data in enumerate(trainloader): # imgs, pids = self._parse_data_for_train(data) # print(pids) # print('QQ') # tensor([691, 691, 691, 691, 68, 68, 68, 68, 468, 468, 468, 468, 67, 67, # 67, 67, 232, 232, 232, 232, 293, 293, 293, 293, 244, 244, 244, 244, # 13, 13, 13, 13]) # tensor([290, 290, 290, 290, 535, 535, 535, 535, 55, 55, 55, 55, 558, 558, # 558, 558, 129, 129, 129, 129, 699, 699, 699, 699, 232, 232, 232, 232, # 655, 655, 655, 655]) # ... for batch_idx, data in enumerate(trainloader): data_time.update(time.time() - end) imgs, pids = self._parse_data_for_train(data) if self.use_gpu: imgs = imgs.cuda() pids = pids.cuda() self.optimizer.zero_grad() outputs = self.model(imgs) loss, p_num, n_num = self._compute_loss(self.criterion, outputs, pids) # loss = Variable(loss, requires_grad = True) if loss.item() > 0: loss.backward() self.optimizer.step() batch_time.update(time.time() - end) losses.update(loss.item(), pids.size(0)) p_nums.update(p_num) n_nums.update(n_num) accs.update(metrics.accuracy(outputs, pids)[0].item()) if (batch_idx + 1) % print_freq == 0: print('Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 'MS Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'P-num {p.val:.2f} ({p.avg:.2f})\t' 'N-num {n.val:.2f} ({n.avg:.2f})\t' 'Acc {acc.val:.2f} ({acc.avg:.2f})\t'.format( epoch + 1, batch_idx + 1, len(trainloader), batch_time=batch_time, data_time=data_time, loss=losses, p=p_nums, n=n_nums, acc=accs)) end = time.time() if (self.scheduler is not None) and (not fixbase): self.scheduler.step()
def _evaluate(self, epoch, dataset_name='', queryloader=None, galleryloader=None, dist_metric='euclidean', normalize_feature=False, visrank=False, visrank_topk=20, save_dir='', use_metric_cuhk03=False, ranks=[1, 5, 10, 20], rerank=False, load_pose=False, part_score=False): batch_time = AverageMeter() self.model.eval() print('Extracting features from query set ...') qf, q_pids, q_camids = [], [], [ ] # query features, query person IDs and query camera IDs q_score = [] for batch_idx, data in enumerate(queryloader): if load_pose: imgs, pids, camids, pose = self._parse_data_for_eval(data) else: imgs, pids, camids = self._parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() if load_pose: if part_score: features, score = self._extract_features(imgs, pose) score = score.data.cpu() q_score.append(score) else: features = self._extract_features(imgs, pose) else: features = self._extract_features(imgs) batch_time.update(time.time() - end) features = features.data.cpu() qf.append(features) q_pids.extend(pids) q_camids.extend(camids) qf = torch.cat(qf, 0) q_pids = np.asarray(q_pids) q_camids = np.asarray(q_camids) if part_score: q_score = torch.cat(q_score) print('Done, obtained {}-by-{} matrix'.format(qf.size(0), qf.size(1))) print('Extracting features from gallery set ...') gf, g_pids, g_camids = [], [], [ ] # gallery features, gallery person IDs and gallery camera IDs g_score = [] end = time.time() for batch_idx, data in enumerate(galleryloader): if load_pose: imgs, pids, camids, pose = self._parse_data_for_eval(data) else: imgs, pids, camids = self._parse_data_for_eval(data) if self.use_gpu: imgs = imgs.cuda() end = time.time() if load_pose: # if part_score: if part_score: features, score = self._extract_features(imgs, pose) score = score.data.cpu() g_score.append(score) else: features = self._extract_features(imgs, pose) else: features = self._extract_features(imgs) batch_time.update(time.time() - end) features = features.data.cpu() gf.append(features) g_pids.extend(pids) g_camids.extend(camids) gf = torch.cat(gf, 0) g_pids = np.asarray(g_pids) g_camids = np.asarray(g_camids) if part_score: g_score = torch.cat(g_score) print('Done, obtained {}-by-{} matrix'.format(gf.size(0), gf.size(1))) print('Speed: {:.4f} sec/batch'.format(batch_time.avg)) if normalize_feature: print('Normalzing features with L2 norm ...') qf = F.normalize(qf, p=2, dim=1) gf = F.normalize(gf, p=2, dim=1) print( 'Computing distance matrix with metric={} ...'.format(dist_metric)) if part_score: distmat = metrics.compute_weight_distance_matrix( qf, gf, q_score, g_score, dist_metric) else: distmat = metrics.compute_distance_matrix(qf, gf, dist_metric) distmat = distmat.numpy() if rerank: print('Applying person re-ranking ...') distmat_qq = metrics.compute_distance_matrix(qf, qf, dist_metric) distmat_gg = metrics.compute_distance_matrix(gf, gf, dist_metric) distmat = re_ranking(distmat, distmat_qq, distmat_gg) print('Computing CMC and mAP ...') cmc, mAP = metrics.evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, use_metric_cuhk03=use_metric_cuhk03) print('** Results **') print('mAP: {:.1%}'.format(mAP)) print('CMC curve') for r in ranks: print('Rank-{:<3}: {:.1%}'.format(r, cmc[r - 1])) if visrank: visualize_ranked_results( distmat, self.datamanager.return_testdataset_by_name(dataset_name), save_dir=osp.join(save_dir, 'visrank-' + str(epoch + 1), dataset_name), topk=visrank_topk) return cmc[0]