def evaluate(model, criterion, eval_dataloader, is_aux=False): model.eval() eval_loss = 0 eval_acc = 0 eval_acc_cls = 0 eval_mean_iu = 0 eval_fwavacc = 0 for (val_img, val_label) in eval_dataloader: val_img = val_img.cuda() val_label = val_label.cuda() # forward if is_aux is True: aux_out1, aux_out2, main_out = model(val_img) else: main_out = model(val_img) val_loss = criterion(main_out, val_label) eval_loss += val_loss.item() label_pred = main_out.max(dim=1)[1].data.cpu().numpy() label_true = val_label.data.cpu().numpy() for lbt, lbp in zip(label_true, label_pred): acc, acc_cls, mean_iu, fwavacc = label_accuracy_score( lbt, lbp, config.num_class) eval_acc += acc eval_acc_cls += acc_cls eval_mean_iu += mean_iu eval_fwavacc += fwavacc return eval_loss, eval_acc, eval_mean_iu
def train_epoch(self): self.model.train() for batch_idx, (data, target) in enumerate(self.train_loader): score, loss, lbl_pred, lbl_true = self.forward(data, target) self.optim.zero_grad() loss.backward() self.optim.step() print("Seenmask Train Epoch {:<5} | Iteration {:<5} | Loss {:5.5f} | seenmask_score grad sum {:7.8f} | seenmask_upscore grad sum {:7.8f} | score sum {:10.5f}".format( int(self.epoch), int(batch_idx), float(loss.data[0]), float(self.model.seenmask_score.weight.grad.sum().data[0]), float(self.model.seenmask_upscore.weight.grad.sum().data[0]), float(score.sum().data[0]))) metrics = utils.label_accuracy_score(lbl_true.numpy(), lbl_pred, self.n_class) with open(osp.join(self.log_dir, 'seenmask_train_log.csv'), 'a') as f: elapsed_time = (datetime.datetime.now(pytz.timezone('US/Eastern')) - self.timestamp_start).total_seconds() log = [self.epoch, self.iteration] + [loss.data[0]] + list(metrics) + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') # write to tensorboard self.tb_writer.add_scalar('seenmask/train/loss', loss.data[0], self.iteration) self.tb_writer.add_scalar('seenmask/train/pxl_acc', metrics[0], self.iteration) self.tb_writer.add_scalar('seenmask/train/class_acc', metrics[1], self.iteration) self.tb_writer.add_scalar('seenmask/train/mean_iu', metrics[2], self.iteration) self.tb_writer.add_scalar('seenmask/train/fwavacc', metrics[3], self.iteration) self.iteration += 1
def validation(epoch, model, data_loader, criterion, device): print('Start validation fold{} #{}'.format(fold + 1, epoch)) model.eval() with torch.no_grad(): total_loss = 0 cnt = 0 mIoU_list = [] for step, (images, masks, _) in enumerate(data_loader): images = torch.stack(images) # (batch, channel, height, width) masks = torch.stack( masks).long() # (batch, channel, height, width) images, masks = images.to(device), masks.to(device) outputs = model(images) loss = criterion(outputs, masks) total_loss += loss cnt += 1 outputs = torch.argmax(outputs.squeeze(), dim=1).detach().cpu().numpy() mIoUs = label_accuracy_score(masks.detach().cpu().numpy(), outputs, n_class=12) mIoU_list += mIoUs avrg_loss = total_loss / cnt print( 'Validation {} #{} Average Loss: {:.4f}, mIoU: {:.4f}'.format( fold + 1, epoch, avrg_loss, np.mean(mIoU_list))) return np.mean(mIoU_list)
def _get_loss_gen(self): batchsize = self.y_fake.data.shape[0] L_mce = F.softmax_cross_entropy(self.pred_label_map, self.ground_truth, normalize=False) L_bce = F.softmax_cross_entropy( self.y_fake, Variable(self.xp.ones(batchsize, dtype=self.xp.int32), volatile=not self.gen.train)) loss = L_mce + self.L_bce_weight * L_bce # log report label_true = chainer.cuda.to_cpu(self.ground_truth.data) label_pred = chainer.cuda.to_cpu( self.pred_label_map.data).argmax(axis=1) logs = [] for i in six.moves.range(batchsize): acc, acc_cls, iu, fwavacc = utils.label_accuracy_score( label_true[i], label_pred[i], self.n_class) logs.append((acc, acc_cls, iu, fwavacc)) log = np.array(logs).mean(axis=0) values = { 'loss': loss, 'accuracy': log[0], 'accuracy_cls': log[1], 'iu': log[2], 'fwavacc': log[3], } chainer.report(values, self.gen) return loss
def val(net,val_loader,viz,criterion,iteration = None): ''' validation our model is well? ''' training = net.training #eval mode for drop device = torch.device("cuda:0") net.eval() val_true_win = 'val_true_image' label_name = val_loader.dataset.class_names val_loss =0.0 label_trues = [] label_preds = [] for batch_idx,(data,target) in tqdm.tqdm(enumerate(val_loader,1),total = len(val_loader),desc='Validation iteration {}'.format(iteration),ncols=80,leave = False): data = data.to(device) target = target.to(device) scores = net(data) with torch.set_grad_enabled(False): loss =criterion(scores,target) val_loss +=loss.item() imgs =data.detach().cpu().numpy() #get the idx value for which labels? lbl_pred = scores.max(1)[1].cpu().numpy()[:,:,:] lbl_true = target.detach().cpu().numpy() label_trues.append(lbl_true) label_preds.append(lbl_pred) acc, acc_cls, mean_iu, fwavacc = utils.label_accuracy_score(label_trues,label_preds,21) loss = val_loss/len(val_loader) net.train() return loss,acc,acc_cls,mean_iu,fwavacc
def val(model, dataloader): model.eval() eval_acc = 0 eval_acc_cls = 0 eval_mean_iu = 0 eval_fwavacc = 0 li_pred = [] li_gt = [] for i, (_, im, cloud, theta, shift, lb) in enumerate(dataloader): im, cloud, theta, shift, lb = Variable(im), Variable(cloud), Variable(theta), Variable(shift), Variable(lb) im, cloud, theta, shift, lb = im.float().cuda(), cloud.float().cuda(), theta.float().cuda(), shift.float().cuda(), lb.long().cuda() _, pred = model(im, cloud, theta, shift) # Mean IoU label_true = lb.data.cpu().numpy().astype(np.int8) label_pred = pred.data.cpu().numpy().squeeze(0) label_pred = (label_pred > 0.5).astype(np.int8) for (label, prob) in zip(label_true, label_pred): acc, acc_cls, mean_iu, fwavacc = label_accuracy_score(label, prob, n_class=2) eval_acc += acc eval_acc_cls += acc_cls eval_mean_iu += mean_iu eval_fwavacc += fwavacc # MaxF label_pred = pred.data.cpu().numpy().squeeze() label_true = lb.data.cpu().numpy().squeeze() li_pred.append(label_pred) li_gt.append(label_true) print 'Validation ======ACC: %lf,Mean IoU: %lf======' % (eval_acc/dataloader.__len__(), eval_mean_iu/dataloader.__len__()) max_f = eval_road(li_pred, li_gt) model.train() return max_f
def train_epoch(self): self.model.train() if self.epoch % 20 == 0 and self.epoch != 0: for param_group in self.optim.param_groups: param_group['lr'] = self.initlr * 0.1 self.initlr = self.initlr * 0.1 print('current learning rate is ') print(self.initlr) n_class = len(self.train_loader.dataset.class_names) for batch_idx, (data, target) in tqdm.tqdm( enumerate(self.train_loader), total=len(self.train_loader), desc='Train epoch=%d' % self.epoch, ncols=80, leave=False): iteration = batch_idx + self.epoch * len(self.train_loader) #print('shape of validate data is ',data.shape) if self.iteration != 0 and (iteration - 1) != self.iteration: continue # for resuming self.iteration = iteration if self.iteration % self.interval_validate == 0: self.validate() if self.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) self.optim.zero_grad() score = self.model(data) loss = cross_entropy2d(score, target, size_average=self.size_average) loss.backward() self.optim.step() metrics = [] lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :] lbl_true = target.data.cpu().numpy() for lt, lp in zip(lbl_true, lbl_pred): acc, acc_cls, mean_iu, fwavacc = \ utils.label_accuracy_score( [lt], [lp], n_class=n_class) metrics.append((acc, acc_cls, mean_iu, fwavacc)) metrics = np.mean(metrics, axis=0) with open(osp.join(self.out, 'log.csv'), 'a') as f: elapsed_time = ( datetime.datetime.now(pytz.timezone('Asia/Shanghai')) - self.timestamp_start).total_seconds() log = [self.epoch, self.iteration] + [loss.data[0]] + \ metrics.tolist() + [''] * 5 + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') if self.iteration >= self.max_iter: break
def eval_on_validation(): if not os.path.exists(config.run_dir): os.mkdir(config.run_dir) model = getattr(models, config.model)() model = torch.nn.DataParallel(model) # multi-gpu model.cuda() print 'test on validation set.', config.model print model if config.load_model_path: model.load_state_dict(torch.load(config.load_model_path)) # data test_data = KITTIRoadFusion(config.root, split='val', num_features=19) test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False, num_workers=4) # test model.eval() eval_acc = 0 eval_acc_cls = 0 eval_mean_iu = 0 eval_fwavacc = 0 li_pred = [] li_gt = [] total_time = 0 for i, (name, im, cloud, theta, shift, lb) in enumerate(test_dataloader): im, cloud, theta, shift, lb = Variable(im), Variable(cloud), Variable(theta), Variable(shift), Variable(lb) im, cloud, theta, shift, lb = im.float().cuda(), cloud.float().cuda(), theta.float().cuda(), shift.float().cuda(), lb.long().cuda() start = time.clock() _, pred = model(im, cloud, theta, shift) # inference end = time.clock() total_time += (end-start) # pred = F.upsample_bilinear(pred, scale_factor=4) # save image label_pred = pred.data.cpu().numpy().squeeze() label_pred = np.array(label_pred*255, dtype=np.uint8) filename = os.path.join(config.run_dir, name[0]) print filename # cv2.imwrite(filename, label_pred) # Mean IoU label_true = lb.data.cpu().numpy().astype(np.int8) label_pred = pred.data.cpu().numpy().squeeze(0) label_pred = (label_pred > 0.5).astype(np.int8) for (label, prob) in zip(label_true, label_pred): acc, acc_cls, mean_iu, fwavacc = label_accuracy_score(label, prob, n_class=2) eval_acc += acc eval_acc_cls += acc_cls eval_mean_iu += mean_iu eval_fwavacc += fwavacc # MaxF label_pred = pred.data.cpu().numpy().squeeze() label_true = lb.data.cpu().numpy().squeeze() li_pred.append(label_pred) li_gt.append(label_true) print 'Runtime ############# time(s) : %f ##########' % (total_time / test_dataloader.__len__()) print 'Validation ======ACC: %lf,Mean IoU: %lf======' % (eval_acc / test_dataloader.__len__(), eval_mean_iu / test_dataloader.__len__()) eval_road(li_pred, li_gt)
def train(num_epochs, model, data_loader, val_loader, val_every, device, file_name): learning_rate = 0.0001 from torch.optim.swa_utils import AveragedModel, SWALR from torch.optim.lr_scheduler import CosineAnnealingLR from segmentation_models_pytorch.losses import SoftCrossEntropyLoss, JaccardLoss from adamp import AdamP criterion = [SoftCrossEntropyLoss(smooth_factor=0.1), JaccardLoss('multiclass', classes=12)] optimizer = AdamP(params=model.parameters(), lr=learning_rate, weight_decay=1e-6) swa_scheduler = SWALR(optimizer, swa_lr=learning_rate) swa_model = AveragedModel(model) look = Lookahead(optimizer, la_alpha=0.5) print('Start training..') best_miou = 0 for epoch in range(num_epochs): hist = np.zeros((12, 12)) model.train() for step, (images, masks, _) in enumerate(data_loader): loss = 0 images = torch.stack(images) # (batch, channel, height, width) masks = torch.stack(masks).long() # (batch, channel, height, width) # gpu 연산을 위해 device 할당 images, masks = images.to(device), masks.to(device) # inference outputs = model(images) for i in criterion: loss += i(outputs, masks) # loss 계산 (cross entropy loss) look.zero_grad() loss.backward() look.step() outputs = torch.argmax(outputs.squeeze(), dim=1).detach().cpu().numpy() hist = add_hist(hist, masks.detach().cpu().numpy(), outputs, n_class=12) acc, acc_cls, mIoU, fwavacc = label_accuracy_score(hist) # step 주기에 따른 loss, mIoU 출력 if (step + 1) % 25 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, mIoU: {:.4f}'.format( epoch + 1, num_epochs, step + 1, len(data_loader), loss.item(), mIoU)) # validation 주기에 따른 loss 출력 및 best model 저장 if (epoch + 1) % val_every == 0: avrg_loss, val_miou = validation(epoch + 1, model, val_loader, criterion, device) if val_miou > best_miou: print('Best performance at epoch: {}'.format(epoch + 1)) print('Save model in', saved_dir) best_miou = val_miou save_model(model, file_name = file_name) if epoch > 3: swa_model.update_parameters(model) swa_scheduler.step()
def func_eval(model, criterion, val_dataset, val_loader, post_crf=False): print("Start validation.\n") model.eval() # make model evaluation mode with torch.no_grad(): n_class = 12 total_loss_sum = 0 mIoU_list = [] hist = np.zeros((n_class, n_class)) # confusion matrix for step, (images, masks, img_info) in enumerate(val_loader): images = torch.stack(images).to( CFG.device) # (batch, channel, height, width) masks = torch.stack(masks).long().to( CFG.device) # (batch, channel, height, width) # forward pass (get logits) logits = model(images) # loss 계산 (cross entropy loss) loss = criterion(logits, masks) total_loss_sum += loss.item() * images.shape[0] # use softmax to get probability probs = F.softmax(logits, dim=1) probs = probs.data.cpu().numpy() # Postprocessing if post_crf: pool = mp.Pool(mp.cpu_count()) images = images.data.cpu().numpy().astype(np.uint8).transpose( 0, 2, 3, 1) probs = pool.map(dense_crf_wrapper, zip(images, probs)) pool.close() # get class index which has biggest probability preds = np.argmax(probs, axis=1) masks = masks.detach().cpu().numpy() hist = add_hist(hist, masks, preds, n_class=n_class) if step == 0: fig_mask = log_images(masks, preds, img_info) del images, masks, logits, probs, preds val_loss = total_loss_sum / len(val_dataset) acc, acc_cls, mIoU, iu, fwavacc = label_accuracy_score(hist) recycle = [ 'Background', 'UNKNOWN', 'General trash', 'Paper', 'Paper pack', 'Metal', 'Glass', 'Plastic', 'Styrofoam', 'Plastic bag', 'Battery', 'Clothing' ] mIoU_df = pd.DataFrame({'Recycle Type': recycle, 'IoU': iu}) return val_loss, acc, mIoU, mIoU_df, fig_mask
def validate(self): training = self.model.training self.model.eval() n_class = len(self.val_loader.dataset.class_names) val_loss = 0 visualizations = [] label_trues, label_preds = [], [] with torch.no_grad(): for batch_idx, (data, target) in tqdm.tqdm( enumerate(self.val_loader), total=len(self.val_loader), desc='Valid iteration=%d' % self.iteration, ncols=80, leave=False): if self.cuda: data, target = data.to('cuda'), target.to('cuda') score = self.model(data) loss = cross_entropy2d(score, target, size_average=self.size_average) if np.isnan(float(loss.item())): raise ValueError('loss is nan while validating') val_loss += float(loss.item()) / len(data) imgs = data.data.cpu() lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :] lbl_true = target.data.cpu() for img, lt, lp in zip(imgs, lbl_true, lbl_pred): img, lt = self.val_loader.dataset.untransform(img, lt) label_trues.append(lt) label_preds.append(lp) if len(visualizations) < 9: viz = fcn.utils.visualize_segmentation(lbl_pred=lp, lbl_true=lt, img=img, n_class=n_class) visualizations.append(viz) metrics = utils.label_accuracy_score(label_trues, label_preds, n_class) print(label_trues) print(label_preds) val_loss /= len(self.val_loader) mean_iu = metrics[2] is_best = mean_iu > self.best_mean_iu print('acc, acc_cls, mean_iu, fwavacc') print('acc:', metrics[0]) print('acc_cls:', metrics[1]) print('mean_iu:', metrics[2]) print('fwavacc:', metrics[3]) return label_trues, label_preds, n_class
def train_epoch(self): self.model.train() n_class = len(self.train_loader.dataset.class_names) for batch_idx, (data, target) in tqdm.tqdm( enumerate(self.train_loader), total=len(self.train_loader), desc='Train epoch=%d' % self.epoch, ncols=80, leave=False): iteration = batch_idx + self.epoch * len(self.train_loader) if self.iteration != 0 and (iteration - 1) != self.iteration: continue # for resuming self.iteration = iteration if self.iteration % self.interval_validate == 0: self.validate() assert self.model.training if self.cuda: data, target = data.to('cuda'), target.to('cuda') self.optim.zero_grad() score = self.model(data) loss = cross_entropy2d(score, target, size_average=self.size_average) loss /= len(data) if np.isnan(float(loss.item())): raise ValueError('loss is nan while training') loss.backward() self.optim.step() metrics = [] lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :] lbl_true = target.data.cpu().numpy() acc, acc_cls, mean_iu, fwavacc = \ utils.label_accuracy_score( lbl_true, lbl_pred, n_class=n_class) metrics.append((acc, acc_cls, mean_iu, fwavacc)) metrics = np.mean(metrics, axis=0) with open(osp.join(self.out, 'log.csv'), 'a') as f: elapsed_time = ( datetime.datetime.now(pytz.timezone('America/Bogota')) - self.timestamp_start).total_seconds() log = [self.epoch, self.iteration] + [loss.item()] + \ metrics.tolist() + [''] * 5 + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') if self.iteration >= self.max_iter: break
def eval_on_validation_bev(): if not os.path.exists(config.run_dir): os.mkdir(config.run_dir) model = getattr(models, config.model)() model = torch.nn.DataParallel(model) # multi-gpu model.cuda() print 'test on validation set.', config.model print model if config.load_model_path: model.load_state_dict(torch.load(config.load_model_path)) # data bev = BirdsEyeView() test_data = KITTIRoadFusion(config.root, split='val', num_features=19, return_bev=True) test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False, num_workers=4) # test model.eval() eval_acc = 0 eval_acc_cls = 0 eval_mean_iu = 0 eval_fwavacc = 0 li_pred = [] li_gt = [] for i, (name, im, cloud, theta, shift, _, lb) in enumerate(test_dataloader): im, cloud, theta, shift, lb = Variable(im), Variable(cloud), Variable(theta), Variable(shift), Variable(lb) im, cloud, theta, shift, lb = im.float().cuda(), cloud.float().cuda(), theta.float().cuda(), shift.float().cuda(), lb.long().cuda() _, pred = model(im, cloud, theta, shift) # inference pred = pred.data.cpu().numpy().squeeze() theta = theta.data.cpu().numpy().squeeze() shift = shift.data.cpu().numpy().squeeze() label_pred = bev.transformLable2BEV((pred*255).astype(np.uint8), theta, shift) label_true = lb.data.cpu().numpy().squeeze() # save image filename = os.path.join(config.run_dir, name[0]) print filename cv2.imwrite(filename, label_pred) label_pred = label_pred/255. # Mean IoU label_true_hard = np.expand_dims(label_true.astype(np.int8), axis=0) label_pred_hard = np.expand_dims((label_pred > 0.5).astype(np.int8), axis=0) for (label, prob) in zip(label_true_hard, label_pred_hard): acc, acc_cls, mean_iu, fwavacc = label_accuracy_score(label, prob, n_class=2) eval_acc += acc eval_acc_cls += acc_cls eval_mean_iu += mean_iu eval_fwavacc += fwavacc # MaxF li_pred.append(label_pred) li_gt.append(label_true) print 'Validation ======ACC: %lf,Mean IoU: %lf======' % (eval_acc / test_dataloader.__len__(), eval_mean_iu / test_dataloader.__len__()) eval_road(li_pred, li_gt)
def train(num_epochs, model, data_loader, val_loader, criterion, optimizer, saved_dir, val_every, device, file_name, n_class): print('Start training..') best_mIoU = 0 for epoch in range(num_epochs): hist = np.zeros((n_class, n_class)) model.train() for step, (images, masks, _) in enumerate(data_loader): # (batch, channel, height, width) images = torch.stack(images) # (batch, channel, height, width) masks = torch.stack(masks).long() # gpu 연산을 위해 device 할당 images, masks = images.to(device), masks.to(device) # inference outputs = model(images) # loss 계산 (cross entropy loss) loss = criterion(outputs, masks) optimizer.zero_grad() loss.backward() optimizer.step() outputs = torch.argmax(outputs.squeeze(), dim=1).detach().cpu().numpy() hist = add_hist(hist, masks.detach().cpu().numpy(), outputs, n_class=n_class) acc, acc_cls, mIoU, fwavacc = label_accuracy_score(hist) wandb.log({"loss": loss, "mIoU": mIoU}) # wandb 로그출력 # step 주기에 따른 loss 출력 if (step + 1) % 25 == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, mIoU:{:.4f}'. format(epoch + 1, num_epochs, step + 1, len(train_loader), loss.item(), mIoU)) # validation 주기에 따른 loss 출력 및 best model 저장 # mIoU에 따라 모델 저장 if (epoch + 1) % val_every == 0: avrg_loss, val_mIoU = validation(epoch + 1, model, val_loader, criterion, device, n_class) if val_mIoU > best_mIoU: print('Best performance at epoch: {}'.format(epoch + 1)) print('Save model in', saved_dir) best_mIoU = val_mIoU save_model(model, saved_dir, file_name) wandb.log({ "val_loss": avrg_loss, "val_mIoU": val_mIoU, "best_mIoU": best_mIoU })
def eval_metric(self, score, target, n_class): # ----------------------------------------------------------------------------- metrics = [] lbl_pred = score.max(1)[1].cpu().numpy()[:, :, :] lbl_true = target.cpu().numpy() for lt, lp in zip(lbl_true, lbl_pred): acc, acc_cls, mean_iu, fwavacc = \ utils.label_accuracy_score( [lt], [lp], n_class=n_class) metrics.append((acc, acc_cls, mean_iu, fwavacc)) metrics = np.mean(metrics, axis=0) return metrics
def train(epoch_idx, net, train_loader, lr, logger, n_class): net.cuda() net.train() base_params = list(map(id, net.base_net.parameters())) top_params = filter(lambda p: id(p) not in base_params, net.parameters()) optimizer = torch.optim.SGD([{ 'params': top_params }, { 'params': net.base_net.parameters(), 'lr': lr * 0.1 }], lr=lr, momentum=0.9, weight_decay=0.00004) criterion = nn.CrossEntropyLoss(ignore_index=-1) len_batch = len(train_loader) for batch_idx, (data, target) in enumerate(train_loader): data, target = data.cuda(), target.cuda() optimizer.zero_grad() score = net(data) loss = criterion(score, target) loss.backward() optimizer.step() _, predicted = score.max(1) predicted, target = to_np(predicted), to_np(target) acc, acc_cls, mean_iu = label_accuracy_score(target, predicted, n_class) info = { 'acc': acc, 'acc_cls': acc_cls, 'mean_iu': mean_iu, 'loss': loss.data[0] } for tag, value in info.items(): logger.scalar_summary(tag, value, len_batch * epoch_idx + batch_idx + 1) print(('train', batch_idx, epoch_idx)) if (epoch_idx + 1) % 10 == 0: n = (epoch_idx + 1) / 10 state = net.state_dict() torch.save(state, './deeplab_epoch_' + str(n) + '.pth')
def main(): model = Deeplab() dataset = VOC2012ClassSeg('./dataset', split='train', transform=True) val_loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=1, pin_memory=True) # n_class = len(dataset.class_names) # model_file = '' # moda_data = torch.load(model_file) # try: # model.load_state_dict(model_data) # except Exception: # model.load_state_dict(model_data['model_state_dict']) # if torch.cuda.is_available(): # model.cuda() model.eval() label_trues, label_preds = [], [] for batch_idx, (data, target) in enumerate(val_loader): # if torch.cuda.is_available(): # data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) score = model(data) _, predicted = score.max(1) predicted = to_np(predicted) target = to_np(target) for lt, lp in zip(target, predicted): label_trues.append(lt) label_preds.append(lp) if batch_idx == 5: break n_class = 21 print(len(label_preds)) metrics = label_accuracy_score(label_trues, label_preds, n_class=n_class) metrics = np.array(metrics) metrics *= 100 print(metrics)
def validate(self): self.netd.eval() self.netg.eval() self.nets.eval() progressbar = tqdm(self.valid_data_loader) for ii, (imgs, _) in enumerate(progressbar): normal, defect, target = imgs if self.opt.use_gpu: normal = normal.cuda() defect = defect.cuda() target = target.cuda() repair = self.netg(defect) if self.opt.with_segmentation: seg_input = torch.cat([defect, repair], dim=1) seg = self.nets(seg_input) else: seg = None if self.opt.with_segmentation: metrics = [] lbl_pred = seg.data.max(1)[1].cpu().numpy()[:, :, :] lbl_true = target.data.cpu().numpy() acc, acc_cls, mean_iu, fwavacc = \ label_accuracy_score( lbl_true, lbl_pred, n_class=2) metrics.append((acc, acc_cls, mean_iu, fwavacc)) metrics = np.mean(metrics, axis=0) progressbar.set_description( f'Acc: {metrics[0]:.5f}, Acc_cls: {metrics[1]:.5f}, MIU: {metrics[2]:.5f}, Fwavacc: {metrics[3]:.5f}' ) if self.opt.debug: if not os.path.exists(self.opt.val_save_path): os.makedirs(self.opt.val_save_path) imgs = torch.cat((defect, repair), 0) tv.utils.save_image(imgs, os.path.join( self.opt.val_save_path, '{}_defect_repair.jpg'.format(ii)), normalize=True, range=(-1, 1))
def validation(epoch, model, data_loader, criterion, device, n_class): print('Start validation #{}'.format(epoch)) model.eval() with torch.no_grad(): total_loss = 0 cnt = 0 mIoU_list = [] hist = np.zeros((n_class, n_class)) # 중첩을위한 변수 for step, (images, masks, _) in enumerate(data_loader): # (batch, channel, height, width) images = torch.stack(images) # (batch, channel, height, width) masks = torch.stack(masks).long() images, masks = images.to(device), masks.to(device) outputs = model(images) loss = criterion(outputs, masks) total_loss += loss cnt += 1 outputs = torch.argmax(outputs.squeeze(), dim=1).detach().cpu().numpy() # 계산을 위한 중첩 hist = add_hist(hist, masks.detach().cpu().numpy(), outputs, n_class=n_class) # mIoU = label_accuracy_score( # masks.detach().cpu().numpy(), outputs, n_class=12)[2] # mIoU_list.append(mIoU) # mIoU가 전체에대해 계산 acc, acc_cls, mIoU, fwavacc = label_accuracy_score(hist) avrg_loss = total_loss / cnt print('Validation #{} Average Loss: {:.4f}, mIoU: {:.4f}'.format( epoch, avrg_loss, mIoU)) return avrg_loss, mIoU
def calc_loss(self): batchsize = self.ground_truth.shape[0] self.loss = F.softmax_cross_entropy(self.pred_label_map, self.ground_truth, normalize=False) # log report label_true = chainer.cuda.to_cpu(self.ground_truth.data) label_pred = chainer.cuda.to_cpu( self.pred_label_map.data).argmax(axis=1) logs = [] for i in six.moves.range(batchsize): acc, acc_cls, iu, fwavacc = utils.label_accuracy_score( label_true[i], label_pred[i], self.n_class) logs.append((acc, acc_cls, iu, fwavacc)) log = np.array(logs).mean(axis=0) values = { 'loss': self.loss, 'accuracy': log[0], 'accuracy_cls': log[1], 'iu': log[2], 'fwavacc': log[3], } chainer.report(values, self.model)
def psudo_labeling(num_epochs, model, data_loader, val_loader, unlabeled_loader, criterion, optimizer, device, n_class, saved_dir, file_name, val_every): # Instead of using current epoch we use a "step" variable to calculate alpha_weight # This helps the model converge faster step = 100 size = 256 transform = A.Compose([A.Resize(256, 256)]) preds_array = np.empty((0, size * size), dtype=np.long) file_name_list = [] best_mIoU = 0 model.train() for epoch in range(num_epochs): hist = np.zeros((n_class, n_class)) for batch_idx, (imgs, image_infos) in enumerate(unlabeled_loader): # Forward Pass to get the pseudo labels # --------------------------------------------- test(unlabelse)를 모델에 통과 model.eval() outs = model(torch.stack(imgs).to(device)) oms = torch.argmax(outs.squeeze(), dim=1).detach().cpu().numpy() oms = torch.Tensor(oms) oms = oms.long() oms = oms.to(device) # --------------------------------------------- 학습 model.train() # Now calculate the unlabeled loss using the pseudo label imgs = torch.stack(imgs) imgs = imgs.to(device) # preds_array = preds_array.to(device) output = model(imgs) unlabeled_loss = alpha_weight(step) * criterion(output, oms) # Backpropogate optimizer.zero_grad() unlabeled_loss.backward() optimizer.step() output = torch.argmax(output.squeeze(), dim=1).detach().cpu().numpy() hist = add_hist(hist, oms.detach().cpu().numpy(), output, n_class=n_class) if (batch_idx + 1) % 25 == 0: acc, acc_cls, mIoU, fwavacc = label_accuracy_score(hist) print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, mIoU:{:.4f}'. format(epoch + 1, num_epochs, batch_idx + 1, len(unlabeled_loader), unlabeled_loss.item(), mIoU)) # For every 50 batches train one epoch on labeled data # 50배치마다 라벨데이터를 1 epoch학습 if batch_idx % 50 == 0: # Normal training procedure for batch_idx, (images, masks, _) in enumerate(train_loader): images = torch.stack(images) # (batch, channel, height, width) masks = torch.stack(masks).long() # gpu 연산을 위해 device 할당 images, masks = images.to(device), masks.to(device) output = model(images) labeled_loss = criterion(output, masks) optimizer.zero_grad() labeled_loss.backward() optimizer.step() # Now we increment step by 1 step += 1 if (epoch + 1) % val_every == 0: avrg_loss, val_mIoU = validation(epoch + 1, model, val_loader, criterion, device, n_class) if val_mIoU > best_mIoU: print('Best performance at epoch: {}'.format(epoch + 1)) print('Save model in', saved_dir) best_mIoU = val_mIoU save_model(model, saved_dir, file_name) wandb.log({ "val_loss": avrg_loss, "val_mIoU": val_mIoU, "best_mIoU": best_mIoU }) model.train()
def validate(self): n_class = self.train_loader.dataset.n_class # os.system('play -nq -t alsa synth {} sine {}'.format(0.3, 440)) # sound an alarm val_loss = 0 prec = 0 metrics = np.zeros((len(self.val_loader), 4), dtype=np.float64) for batch_idx, (rgb_img, ddd_img, target) in tqdm.tqdm(enumerate(self.val_loader), total=len(self.val_loader), desc=' val %d' % self.epoch, ncols=80, leave=False): ## validate with torch.no_grad(): self.model.eval() if self.cuda: rgb_img = rgb_img.cuda() ddd_img = ddd_img.cuda() target = target.cuda() output = self.model(rgb_img, ddd_img) if self.val_loader.dataset.encode_label: output = F.interpolate(output, size=target.size()[2:], mode='bilinear', align_corners=False) else: output = F.interpolate(output, size=target.size()[1:], mode='bilinear', align_corners=False) loss = self.criterion(output, target) loss_data = loss.data.item() if np.isnan(loss_data): raise ValueError('loss is nan while validating') val_loss += loss_data / len(rgb_img) ## some stats lbl_pred = output.data.max(1)[1].cpu().numpy().squeeze() lbl_true = target.data.cpu().numpy().squeeze() prec += compute_precision(lbl_pred, lbl_true) m = label_accuracy_score(lbl_true, lbl_pred, n_class) metrics[batch_idx, :] = np.array(m) metrics = np.mean(metrics, axis=0) val_prec = prec / len(self.val_loader) with open(osp.join(self.output_path, 'log.csv'), 'a') as f: metrics_str = ['%.10f' % (a) for a in list(metrics)] elapsed_time = ( datetime.datetime.now(pytz.timezone('Asia/Jakarta')) - self.timestamp_start).total_seconds() val_loss /= len(self.val_loader) log = [self.epoch, self.iteration] + [''] * 5 + \ ['%.10f' %(val_loss)] + metrics_str + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') mean_iu = metrics[2] is_best = mean_iu > self.best_mean_iu if is_best: self.best_mean_iu = mean_iu is_prec_best = val_prec > self.best_prec if is_prec_best: self.best_prec = val_prec torch.save( { 'epoch': self.epoch, 'iteration': self.iteration, 'arch': self.arch, 'optim_state_dict': self.optim.state_dict(), 'model_state_dict': self.model.state_dict(), 'best_mean_iu': self.best_mean_iu, 'best_prec': self.best_prec, }, osp.join(self.output_path, 'checkpoint.pth.tar')) if self.arch == 'rfnet': torch.save( { 'epoch': self.epoch, 'iteration': self.iteration, 'arch': self.arch, 'optim_state_dict': self.optim.state_dict(), 'optim_dec_state_dict': self.optim_dec.state_dict(), 'model_state_dict': self.model.state_dict(), 'best_mean_iu': self.best_mean_iu, 'best_prec': self.best_prec, }, osp.join(self.output_path, 'checkpoint.pth.tar')) if is_best: shutil.copy(osp.join(self.output_path, 'checkpoint.pth.tar'), osp.join(self.output_path, 'model_best.pth.tar')) if is_prec_best: shutil.copy(osp.join(self.output_path, 'checkpoint.pth.tar'), osp.join(self.output_path, 'model_prec_best.pth.tar')) self.writer.add_scalar('val/loss', val_loss, self.epoch) self.writer.add_scalar('val/precision', val_prec, self.epoch) self.writer.add_scalar('val/accuracy', metrics[0], self.epoch) self.writer.add_scalar('val/acc_class', metrics[1], self.epoch) self.writer.add_scalar('val/mean_iu', metrics[2], self.epoch) self.writer.add_scalar('val/fwacc', metrics[3], self.epoch) if self.scheduler != None: self.scheduler.step(val_prec) if self.training: self.model.train()
def train_epoch(self): self.model.train() if self.freeze_bn: self.model.apply(BNtoFixed) n_class = self.train_loader.dataset.n_class m = [] for batch_idx, (rgb_img, ddd_img, target) in tqdm.tqdm(enumerate(self.train_loader), total=len(self.train_loader), desc=' epoch %d' % self.epoch, ncols=80, leave=False): iteration = batch_idx + self.epoch * len(self.train_loader) self.iteration = iteration ## prepare input and label if self.cuda: rgb_img = rgb_img.cuda() ddd_img = ddd_img.cuda() target = target.cuda() ## main training function ## compute output of feed forward output = self.model(rgb_img, ddd_img) if self.arch.startswith('bisenet'): out_sup1 = F.interpolate(output[1], size=target.size()[1:], mode='bilinear') out_sup2 = F.interpolate(output[2], size=target.size()[1:], mode='bilinear') output = F.interpolate(output[0], size=target.size()[1:], mode='bilinear') elif self.arch.startswith('icnet'): out_sub24 = F.interpolate(output[1], size=target.size()[1:], mode='bilinear') out_sub4 = F.interpolate(output[2], size=target.size()[1:], mode='bilinear') output = F.interpolate(output[0], size=target.size()[1:], mode='bilinear') elif self.train_loader.dataset.encode_label: output = F.interpolate(output, size=target.size()[2:], mode='bilinear') target = target.float() ## compute loss and backpropagate loss = None if self.arch.startswith('bisenet'): loss_p = self.criterion(output, target) loss_a1 = self.crit_aux1(out_sup1, target) loss_a2 = self.crit_aux2(out_sup2, target) loss = loss_p + self.alphas[0] * loss_a1 + self.alphas[ 1] * loss_a2 elif self.arch.startswith('icnet'): loss_sub124 = self.criterion(output, target) loss_sub24 = self.crit_sub24(out_sub24, target) loss_sub4 = self.crit_sub4(out_sub4, target) loss = self.lambdas[0] * loss_sub4 + \ self.lambdas[1] * loss_sub24 + \ self.lambdas[2] * loss_sub124 else: loss = self.criterion(output, target) loss_data = loss.data.item() self.optim.zero_grad() if self.arch.startswith('rfnet'): self.optim_dec.zero_grad() if self.use_amp: with amp.scale_loss(loss, self.optim) as scaled_loss: scaled_loss.backward() else: loss.backward() if self.arch.startswith('bisenet') or 'effnet' in self.arch: nn.utils.clip_grad_norm_(self.model.parameters(), 0.25) self.optim.step() if self.arch.startswith('rfnet'): self.optim_dec.step() ## the stats lbl_pred = output.data.max(1)[1].cpu().numpy()[:, :, :] lbl_true = target.data.cpu().numpy() metrics = label_accuracy_score(lbl_true, lbl_pred, n_class=n_class) with open(osp.join(self.output_path, 'log.csv'), 'a') as f: loss_data_str = '%.10f' % (loss_data) metrics_str = ['%.10f' % (a) for a in list(metrics)] elapsed_time = ( datetime.datetime.now(pytz.timezone('Asia/Jakarta')) - self.timestamp_start).total_seconds() log = [self.epoch, self.iteration] + [loss_data_str] + \ metrics_str + [''] * 5 + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') m.append(metrics) if self.max_iter != None and self.iteration >= self.max_iter: break m = np.mean(np.array(m), axis=0) self.writer.add_scalar('train/loss', loss_data, self.epoch) self.writer.add_scalar('train/accuracy', m[0], self.epoch) self.writer.add_scalar('train/acc_class', m[1], self.epoch) self.writer.add_scalar('train/mean_iu', m[2], self.epoch) self.writer.add_scalar('train/fwacc', m[3], self.epoch)
def test(test_loader, net, criterion, epoch, showall=False): cnn0_loss, cnn0_accs, cnn0_mIoUs, cnn0_acc_clss, cnn0_fscore = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() cnn1_loss, cnn1_accs, cnn1_mIoUs, cnn1_acc_clss, cnn1_fscore = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() cnn2_loss, cnn2_accs, cnn2_mIoUs, cnn2_acc_clss, cnn2_fscore = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() # switch to evaluation mode net.eval() start_time = time.time() for batch_idx, (datas, targets) in enumerate(test_loader): if args.cuda: datas = datas.cuda() datas = Variable(datas, volatile=True) # compute output scores = net(datas) multi_targets = combine_label(targets, COMB_DICTs) multi_targets_tensor = torch.from_numpy(multi_targets).long() if args.cuda: multi_targets_tensor = multi_targets_tensor.cuda() testlosses = [] for i, score in enumerate(scores): targets_i = Variable(multi_targets_tensor[i, :, :, :]) testlosses.append(criterion(score, targets_i)) testloss = sum(testlosses) # measure accuracy and record loss preds = [] for score in scores: p = score.data.max(1)[1] preds.append(p) for i, lbl_pred in enumerate(preds): lbl_pred = lbl_pred.cpu().numpy()[:, :, :] # (n_batch, h, w) lbl_true = multi_targets[i, :, :, :] acc, acc_cls, mIoU, fscore = label_accuracy_score( lbl_true, lbl_pred, n_class=NUM_CLASSES[i]) locals()['cnn%d_loss' % (i)].update(testlosses[i].data[0], datas.size(0)) locals()['cnn%d_accs' % (i)].update(acc, datas.size(0)) locals()['cnn%d_acc_clss' % (i)].update(acc_cls, datas.size(0)) locals()['cnn%d_mIoUs' % (i)].update(mIoU, datas.size(0)) locals()['cnn%d_fscore' % (i)].update(fscore, datas.size(0)) if showall: trues = decode_labels(targets, num_images=len(targets)) for i, t in enumerate(trues): Image.fromarray(t).save( 'runs/{}_{}/results/{}_{}_gt.png'.format( args.name, args.dataset, batch_idx, i)) Image.fromarray( (unNormalize(datas.data).transpose(1, 2).transpose( 2, 3).cpu().numpy()[i] * 255).astype(np.uint8)).save( 'runs/{}_{}/results/{}_{}_img.png'.format( args.name, args.dataset, batch_idx, i)) lbl_pred = preds[2] pred = decode_labels(lbl_pred, num_images=len(lbl_pred)) for i, p in enumerate(pred): Image.fromarray(p).save( 'runs/{}_{}/results/{}_{}_pred.png'.format( args.name, args.dataset, batch_idx, i)) if showall and args.visdom: plot_images(datas, [p.cpu().numpy() for p in preds], multi_targets, epoch, split='test', crop_size=map(int, args.input_size.split(','))) duration = time.time() - start_time print( '\nTest set: Loss: {:.4f}, Acc: {:.2f}%, mIoU: {:.4f}, Acc_cls: {:.2f}%, f-score: {:.2f}% ({:.3f} sec)\n' .format(cnn2_loss.avg, 100. * cnn2_accs.avg, cnn2_mIoUs.avg, 100 * cnn2_acc_clss.avg, 100 * cnn2_fscore.avg, duration)) if args.visdom: for i in range(3): plotter.plot('cnn%d_acc' % (i), 'test', epoch, locals()['cnn%d_accs' % (i)].avg, exp_name=args.name + '_' + args.dataset) plotter.plot('cnn%d_loss' % (i), 'test', epoch, locals()['cnn%d_loss' % (i)].avg, exp_name=args.name + '_' + args.dataset) plotter.plot('cnn%d_mIoU' % (i), 'test', epoch, locals()['cnn%d_mIoUs' % (i)].avg, exp_name=args.name + '_' + args.dataset) plotter.plot('cnn%d_acc_cls' % (i), 'test', epoch, locals()['cnn%d_acc_clss' % (i)].avg, exp_name=args.name + '_' + args.dataset) plotter.plot('cnn%d_fscore' % (i), 'test', epoch, locals()['cnn%d_fscore' % (i)].avg, exp_name=args.name + '_' + args.dataset) # plot images in a grid if epoch == 1 or epoch % 10 == 0: plot_images(datas, [p.cpu().numpy() for p in preds], multi_targets, epoch, split='test', crop_size=map(int, args.input_size.split(','))) return cnn2_accs.avg, cnn2_mIoUs.avg
def validate(self): training = self.model.training self.model.eval() n_class = len(self.val_loader.dataset.class_names) val_loss = 0 visualizations = [] label_trues, label_preds = [], [] with torch.no_grad(): for batch_idx, (data, target) in tqdm.tqdm( enumerate(self.val_loader), total=len(self.val_loader), desc='Valid iteration=%d' % self.iteration, ncols=80, leave=False): if self.cuda: data, target = data.to('cuda'), target.to('cuda') score = self.model(data) loss = cross_entropy2d(score, target, size_average=self.size_average) if np.isnan(float(loss.item())): raise ValueError('loss is nan while validating') val_loss += float(loss.item()) / len(data) imgs = data.data.cpu() lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :] lbl_true = target.data.cpu() for img, lt, lp in zip(imgs, lbl_true, lbl_pred): img, lt = self.val_loader.dataset.untransform(img, lt) label_trues.append(lt) label_preds.append(lp) if len(visualizations) < 9: viz = fcn.utils.visualize_segmentation( lbl_pred=lp, lbl_true=lt, img=img, n_class=n_class) visualizations.append(viz) metrics = utils.label_accuracy_score( label_trues, label_preds, n_class) out = osp.join(self.out, 'visualization_viz') if not osp.exists(out): os.makedirs(out) out_file = osp.join(out, 'iter%012d.jpg' % self.iteration) img_ = fcn.utils.get_tile_image(visualizations) #scipy.misc.imsave(out_file, img_) imageio.imwrite(out_file, img_) plt.imshow(imageio.imread(out_file)) plt.savefig('imagesProduced/validate') #plt.show() val_loss /= len(self.val_loader) with open(osp.join(self.out, 'log.csv'), 'a') as f: elapsed_time = ( datetime.datetime.now(pytz.timezone('America/Bogota')) - self.timestamp_start).total_seconds() log = [self.epoch, self.iteration] + [''] * 5 + [val_loss] + list(metrics) + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') mean_iu = metrics[2] is_best = mean_iu > self.best_mean_iu if is_best: self.best_mean_iu = mean_iu torch.save({ 'epoch': self.epoch, 'iteration': self.iteration, 'arch': self.model.__class__.__name__, 'optim_state_dict': self.optim.state_dict(), 'model_state_dict': self.model.state_dict(), 'best_mean_iu': self.best_mean_iu, }, osp.join(self.out, 'checkpoint.pth.tar')) if is_best: shutil.copy(osp.join(self.out, 'checkpoint.pth.tar'), osp.join(self.out, 'model_best.pth.tar')) if training: self.model.train()
def validate(self): # import matplotlib.pyplot as plt training = self.model.training self.model.eval() n_class = len(self.val_loader.dataset.class_names) val_loss = 0 visualizations = [] label_trues, label_preds = [], [] with torch.no_grad(): for batch_idx, (data, target) in tqdm.tqdm( enumerate(self.val_loader), total=len(self.val_loader), desc="Valid iteration=%d" % self.iteration, ncols=80, leave=False, ): data, target = data.to(self.cuda), target.to(self.cuda) score = self.model(data) loss = self.cross_entropy2d(score, target) if np.isnan(float(loss.item())): raise ValueError("loss is nan while validating") val_loss += float(loss.item()) / len(data) imgs = data.data.cpu() lbl_pred = score.data.max(1)[1].cpu().numpy()[:, :, :] lbl_true = target.data.cpu() for img, lt, lp in zip(imgs, lbl_true, lbl_pred): img, lt = self.val_loader.dataset.untransform(img, lt) label_trues.append(lt) label_preds.append(lp) if len(visualizations) < 9: viz = fcn.utils.visualize_segmentation(lbl_pred=lp, lbl_true=lt, img=img, n_class=n_class) visualizations.append(viz) metrics = utils.label_accuracy_score(label_trues, label_preds, n_class) out = osp.join(self.out, "visualization_viz") if not osp.exists(out): os.makedirs(out) out_file = osp.join(out, "iter%012d.jpg" % self.iteration) img_ = fcn.utils.get_tile_image(visualizations) imageio.imwrite(out_file, img_) # plt.imshow(imageio.imread(out_file)) # plt.show() val_loss /= len(self.val_loader) print( "acc {:0.3f}, acc_cls {:0.3f}, mean_iu {:0.3f}, fwavacc {:0.3f}, val loss {}" .format(metrics[0], metrics[1], metrics[2], metrics[3], val_loss)) with open(osp.join(self.out, "log.csv"), "a") as f: elapsed_time = (datetime.datetime.now() - self.timestamp_start).total_seconds() log = ([self.epoch, self.iteration] + [""] * 5 + [val_loss] + list(metrics) + [elapsed_time]) log = map(str, log) f.write(",".join(log) + "\n") mean_iu = metrics[2] is_best = mean_iu > self.best_mean_iu if is_best: self.best_mean_iu = mean_iu torch.save( { "epoch": self.epoch, "iteration": self.iteration, "arch": self.model.__class__.__name__, "optim_state_dict": self.optim.state_dict(), "model_state_dict": self.model.state_dict(), "best_mean_iu": self.best_mean_iu, }, osp.join(self.out, "checkpoint.pth.tar"), ) if is_best: shutil.copy( osp.join(self.out, "checkpoint.pth.tar"), osp.join(self.out, "model_best.pth.tar"), ) if training: self.model.train()
def validate(self, both_fcn_and_seenmask=False): self.model.eval() val_loss = 0 lbl_trues, lbl_preds, visualizations = [], [], [] for batch_idx, (data, target) in enumerate(self.val_loader): if both_fcn_and_seenmask: score, loss, lbl_pred, lbl_true = self.forward_szn( data, target) else: score, loss, lbl_pred, lbl_true = self.forward(data, target) val_loss += float(loss.data[0]) print( "Test Epoch {:<5} | Iteration {:<5} | Loss {:5.5f} | Score Sum {:10.5f}" .format(int(self.epoch), int(batch_idx), float(loss.data[0]), float(score.sum().data[0]))) img, lt, lp = data[0], lbl_true[0], lbl_pred[ 0] # eliminate first dimension (n=1) for visualization img, lt = self.val_loader.dataset.untransform(img, lt) lbl_trues.append(lt) lbl_preds.append(lp) # generate visualization for first few images of val_loader if len(visualizations) < 25: viz = vis_utils.visualize_segmentation( lbl_pred=lp, lbl_true=lt, img=img, n_class=self.n_class, label_names=self.label_names, unseen=self.val_unseen) visualizations.append(viz) # save the visualizaton image if both_fcn_and_seenmask: out = osp.join(self.log_dir, 'szn_viz') else: out = osp.join(self.log_dir, 'fcn_viz') if not osp.exists(out): os.makedirs(out) out_file = osp.join(out, 'epoch%d.jpg' % self.epoch) viz_img = fcn.utils.get_tile_image(visualizations) scipy.misc.imsave(out_file, viz_img) # update the validation log for the current epoch if self.unseen: metrics = utils.label_accuracy_score(lbl_trues, lbl_preds, self.n_class, unseen=self.val_unseen) metrics, seen_metrics, unseen_metrics = metrics self.tb_writer.add_scalar('fcn/val/seen/pxl_acc', seen_metrics[0], self.epoch) self.tb_writer.add_scalar('fcn/val/seen/class_acc', seen_metrics[1], self.epoch) self.tb_writer.add_scalar('fcn/val/seen/mean_iu', seen_metrics[2], self.epoch) self.tb_writer.add_scalar('fcn/val/seen/fwavacc', seen_metrics[3], self.epoch) self.tb_writer.add_scalar('fcn/val/unseen/pxl_acc', unseen_metrics[0], self.epoch) self.tb_writer.add_scalar('fcn/val/unseen/class_acc', unseen_metrics[1], self.epoch) self.tb_writer.add_scalar('fcn/val/unseen/mean_iu', unseen_metrics[2], self.epoch) self.tb_writer.add_scalar('fcn/val/unseen/fwavacc', unseen_metrics[3], self.epoch) print('seen pxl_acc: %.3f' % seen_metrics[0]) print('seen class_acc: %.3f' % seen_metrics[1]) print('seen mean_iu: %.3f' % seen_metrics[2]) print('seen fwavacc: %.3f' % seen_metrics[3]) print('unseen pxl_acc: %.3f' % unseen_metrics[0]) print('unseen class_acc: %.3f' % unseen_metrics[1]) print('unseen mean_iu: %.3f' % unseen_metrics[2]) print('unseen fwavacc: %.3f' % unseen_metrics[3]) else: metrics = utils.label_accuracy_score(lbl_trues, lbl_preds, self.n_class) val_loss /= len( self.val_loader) # val loss is averaged across all the images with open(osp.join(self.log_dir, 'val_log.csv'), 'a') as f: elapsed_time = datetime.datetime.now( pytz.timezone('US/Eastern')) - self.timestamp_start if self.unseen: log = [self.epoch, self.iteration] + [val_loss] + list( metrics) + list(seen_metrics) + list(unseen_metrics) + [ elapsed_time ] else: log = [self.epoch, self.iteration ] + [val_loss] + list(metrics) + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') # write metrics to tensorboard self.tb_writer.add_scalar('fcn/val/loss', val_loss, self.epoch) self.tb_writer.add_scalar('fcn/val/pxl_acc', metrics[0], self.epoch) self.tb_writer.add_scalar('fcn/val/class_acc', metrics[1], self.epoch) self.tb_writer.add_scalar('fcn/val/mean_iu', metrics[2], self.epoch) self.tb_writer.add_scalar('fcn/val/fwavacc', metrics[3], self.epoch) self.tb_writer.add_image('fcn/segmentations', viz_img, self.epoch) print('overall pxl_acc: %.3f' % metrics[0]) print('overall class_acc: %.3f' % metrics[1]) print('overall mean_iu: %.3f' % metrics[2]) print('overall fwavacc: %.3f' % metrics[3]) # track and update the best mean intersection over union mean_iu = metrics[2] is_best = mean_iu > self.best_mean_iu if is_best: self.best_mean_iu = mean_iu # checkpoint the model's weights torch.save( { 'epoch': self.epoch, 'iteration': self.iteration, 'arch': self.model.__class__.__name__, 'optim_state_dict': self.optim.state_dict(), 'model_state_dict': self.model.state_dict(), 'best_mean_iu': self.best_mean_iu, }, osp.join(self.log_dir, 'checkpoint')) # save the weights for the best performing model so far if is_best: shutil.copy(osp.join(self.log_dir, 'checkpoint'), osp.join(self.log_dir, 'best'))
def train(train_loader, net, criterion, optimizer, epoch): cnn0_loss, cnn0_accs, cnn0_mIoUs, cnn0_acc_clss, cnn0_fscore = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() cnn1_loss, cnn1_accs, cnn1_mIoUs, cnn1_acc_clss, cnn1_fscore = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() cnn2_loss, cnn2_accs, cnn2_mIoUs, cnn2_acc_clss, cnn2_fscore = AverageMeter( ), AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter() # switch to train mode net.train() start_time = time.time() all_start = start_time print(' Train Epoch | Loss | Acc | mIoU |' ' Acc_cls | f-score | Time ') for batch_idx, (datas, targets) in enumerate(train_loader): if args.cuda: datas = datas.cuda() datas = Variable(datas) # compute output scores = net(datas) multi_targets = combine_label(targets, COMB_DICTs) multi_targets_tensor = torch.from_numpy(multi_targets).long() if args.cuda: multi_targets_tensor = multi_targets_tensor.cuda() losses = [] for i, score in enumerate(scores): targets_i = Variable(multi_targets_tensor[i, :, :, :]) losses.append(criterion(score, targets_i)) loss = sum(losses) # compute gradient and do optimizer step optimizer.zero_grad() loss.backward() optimizer.step() # measure accuracy and record loss preds = [] for score in scores: p = score.data.max(1)[1] preds.append(p) for i, lbl_pred in enumerate(preds): lbl_pred = lbl_pred.cpu().numpy()[:, :, :] # (n_batch, h, w) lbl_true = multi_targets[i, :, :, :] acc, acc_cls, mIoU, fscore = label_accuracy_score( lbl_true, lbl_pred, n_class=NUM_CLASSES[i]) locals()['cnn%d_loss' % (i)].update(losses[i].data[0], datas.size(0)) locals()['cnn%d_accs' % (i)].update(acc, datas.size(0)) locals()['cnn%d_acc_clss' % (i)].update(acc_cls, datas.size(0)) locals()['cnn%d_mIoUs' % (i)].update(mIoU, datas.size(0)) locals()['cnn%d_fscore' % (i)].update(fscore, datas.size(0)) if batch_idx % args.log_interval == 0: duration = time.time() - start_time print( '{:3d}[{:4d}/{:4d}] | {:.3f}({:.3f}) | {:2.2f}%({:2.2f}%) | {:.2f}({:.2f}) | {:2.2f}%({:2.2f}%) |' ' {:2.2f}%({:2.2f}%) | ({:.3f} sec)'.format( epoch, batch_idx * len(datas), len(train_loader.dataset), cnn2_loss.val, cnn2_loss.avg, 100. * cnn2_accs.val, 100. * cnn2_accs.avg, cnn2_mIoUs.val, cnn2_mIoUs.avg, 100. * cnn2_acc_clss.val, 100. * cnn2_acc_clss.avg, 100. * cnn2_fscore.val, 100. * cnn2_fscore.avg, duration)) start_time = time.time() duration = time.time() - all_start print( 'Train Summary: Epoch {}, Acc: {:.2f}%, mIoU: {:.2f}, Acc_cls: {:.2f}%, f-score: {:.2f}% ({:.3f} sec)' .format(epoch, 100. * cnn2_accs.avg, cnn2_mIoUs.avg, 100 * cnn2_acc_clss.avg, 100 * cnn2_fscore.avg, duration)) # log avg values to visdom if args.visdom: for i in range(3): plotter.plot('cnn%d_acc' % (i), 'train', epoch, locals()['cnn%d_accs' % (i)].avg, exp_name=args.name + '_' + args.dataset) plotter.plot('cnn%d_loss' % (i), 'train', epoch, locals()['cnn%d_loss' % (i)].avg, exp_name=args.name + '_' + args.dataset) plotter.plot('cnn%d_mIoU' % (i), 'train', epoch, locals()['cnn%d_mIoUs' % (i)].avg, exp_name=args.name + '_' + args.dataset) plotter.plot('cnn%d_acc_cls' % (i), 'train', epoch, locals()['cnn%d_acc_clss' % (i)].avg, exp_name=args.name + '_' + args.dataset) plotter.plot('cnn%d_fscore' % (i), 'train', epoch, locals()['cnn%d_fscore' % (i)].avg, exp_name=args.name + '_' + args.dataset) # plot images in a grid if epoch == 1 or epoch % 10 == 0: plot_images(datas, [p.cpu().numpy() for p in preds], multi_targets, epoch, split='train', crop_size=map(int, args.input_size.split(',')))
def validate(self): self.model.eval() val_loss = 0 lbl_trues, lbl_preds, visualizations = [], [], [] for batch_idx, (data, target) in enumerate(self.val_loader): score, loss, lbl_pred, lbl_true = self.forward(data, target) val_loss += float(loss.data[0]) print("Seenmask Test Epoch {:<5} | Iteration {:<5} | Loss {:5.5f} | Score Sum {:10.5f}".format(int(self.epoch), int(batch_idx), float(loss.data[0]), float(score.sum().data[0]))) img, lt, lp = data[0], lbl_true[0], lbl_pred[0] # eliminate first dimension (n=1) for visualization img, lt = self.val_loader.dataset.untransform(img, lt) lbl_trues.append(lt) lbl_preds.append(lp) # generate visualization for first few images of val_loader if len(visualizations) < 25: viz = vis_utils.visualize_seenmask(lbl_pred=lp, lbl_true=lt, img=img, n_class=self.n_class, unseen=self.unseen) visualizations.append(viz) # save the visualizaton image out = osp.join(self.log_dir, 'seenmask_viz') if not osp.exists(out): os.makedirs(out) out_file = osp.join(out, 'epoch%d.jpg' % self.epoch) viz_img = fcn.utils.get_tile_image(visualizations) scipy.misc.imsave(out_file, viz_img) metrics = utils.label_accuracy_score(lbl_trues, lbl_preds, self.n_class) val_loss /= len(self.val_loader) # val loss is averaged across all the images with open(osp.join(self.log_dir, 'seenmask_val_log.csv'), 'a') as f: elapsed_time = datetime.datetime.now(pytz.timezone('US/Eastern')) - self.timestamp_start log = [self.epoch, self.iteration] + [val_loss] + list(metrics) + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') # write metrics to tensorboard self.tb_writer.add_scalar('seenmask/val/loss', val_loss, self.epoch) self.tb_writer.add_scalar('seenmask/val/pxl_acc', metrics[0], self.epoch) self.tb_writer.add_scalar('seenmask/val/class_acc', metrics[1], self.epoch) self.tb_writer.add_scalar('seenmask/val/mean_iu', metrics[2], self.epoch) self.tb_writer.add_scalar('seenmask/val/fwavacc', metrics[3], self.epoch) self.tb_writer.add_image('fcn/segmentations', viz_img, self.epoch) print('pxl_acc: %.3f'%metrics[0]) print('class_acc: %.3f'%metrics[1]) print('mean_iu: %.3f'%metrics[2]) print('fwavacc: %.3f'%metrics[3]) # track and update the best mean intersection over union mean_iu = metrics[2] is_best = mean_iu > self.best_mean_iu if is_best: self.best_mean_iu = mean_iu self.checkpoint['model_state_dict'] = self.model.state_dict() # TODO: verify torch.save(self.checkpoint, osp.join(self.log_dir, 'best'))
def pseudo_labeling(num_epochs, model, data_loader, val_loader, unlabeled_loader, device, val_every, file_name): # Instead of using current epoch we use a "step" variable to calculate alpha_weight # This helps the model converge faster from torch.optim.swa_utils import AveragedModel, SWALR from segmentation_models_pytorch.losses import SoftCrossEntropyLoss, JaccardLoss from adamp import AdamP criterion = [ SoftCrossEntropyLoss(smooth_factor=0.1), JaccardLoss('multiclass', classes=12) ] optimizer = AdamP(params=model.parameters(), lr=0.0001, weight_decay=1e-6) swa_scheduler = SWALR(optimizer, swa_lr=0.0001) swa_model = AveragedModel(model) optimizer = Lookahead(optimizer, la_alpha=0.5) step = 100 size = 256 best_mIoU = 0 model.train() print('Start Pseudo-Labeling..') for epoch in range(num_epochs): hist = np.zeros((12, 12)) for batch_idx, (imgs, image_infos) in enumerate(unlabeled_loader): # Forward Pass to get the pseudo labels # --------------------------------------------- test(unlabelse)를 모델에 통과 model.eval() outs = model(torch.stack(imgs).to(device)) oms = torch.argmax(outs.squeeze(), dim=1).detach().cpu().numpy() oms = torch.Tensor(oms) oms = oms.long() oms = oms.to(device) # --------------------------------------------- 학습 model.train() # Now calculate the unlabeled loss using the pseudo label imgs = torch.stack(imgs) imgs = imgs.to(device) # preds_array = preds_array.to(device) output = model(imgs) loss = 0 for each in criterion: loss += each(output, oms) unlabeled_loss = alpha_weight(step) * loss # Backpropogate optimizer.zero_grad() unlabeled_loss.backward() optimizer.step() output = torch.argmax(output.squeeze(), dim=1).detach().cpu().numpy() hist = add_hist(hist, oms.detach().cpu().numpy(), output, n_class=12) if (batch_idx + 1) % 25 == 0: acc, acc_cls, mIoU, fwavacc = label_accuracy_score(hist) print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, mIoU:{:.4f}'. format(epoch + 1, num_epochs, batch_idx + 1, len(unlabeled_loader), unlabeled_loss.item(), mIoU)) # For every 50 batches train one epoch on labeled data # 50배치마다 라벨데이터를 1 epoch학습 if batch_idx % 50 == 0: # Normal training procedure for batch_idx, (images, masks, _) in enumerate(data_loader): labeled_loss = 0 images = torch.stack(images) # (batch, channel, height, width) masks = torch.stack(masks).long() # gpu 연산을 위해 device 할당 images, masks = images.to(device), masks.to(device) output = model(images) for each in criterion: labeled_loss += each(output, masks) optimizer.zero_grad() labeled_loss.backward() optimizer.step() # Now we increment step by 1 step += 1 if (epoch + 1) % val_every == 0: avrg_loss, val_mIoU = validation(epoch + 1, model, val_loader, criterion, device) if val_mIoU > best_mIoU: print('Best performance at epoch: {}'.format(epoch + 1)) print('Save model in', saved_dir) best_mIoU = val_mIoU save_model(model, file_name=file_name) model.train() if epoch > 3: swa_model.update_parameters(model) swa_scheduler.step()