def __init__(self, args): self.args = args self.logger = logging.getLogger() self.logger.setLevel(logging.INFO) self.Eval = Eval(self.args.num_classes) self.model = DeepLab(output_stride=self.args.output_stride, class_num=self.args.num_classes, pretrained=self.args.imagenet_pretrained and self.args.pretrained_ckpt_file == None, bn_momentum=self.args.bn_momentum, freeze_bn=self.args.freeze_bn) if self.args.pretrained_ckpt_file: self.load_checkpoint(self.args.pretrained_ckpt_file)
def val_info(Eval, name): PA = Eval.Pixel_Accuracy() MPA = Eval.Mean_Pixel_Accuracy() MIoU = Eval.Mean_Intersection_over_Union() FWIoU = Eval.Frequency_Weighted_Intersection_over_Union() PC = Eval.Mean_Precision() print("########## Eval{} ############".format(name)) self.logger.info( '\nEpoch:{:.3f}, {} PA1:{:.3f}, MPA1:{:.3f}, MIoU1:{:.3f}, FWIoU1:{:.3f}, PC:{:.3f}' .format(self.current_epoch, name, PA, MPA, MIoU, FWIoU, PC)) return PA, MPA, MIoU, FWIoU
def __init__(self, args, cuda=None, train_id="None", logger=None): self.args = args os.environ["CUDA_VISIBLE_DEVICES"] = self.args.gpu self.cuda = cuda and torch.cuda.is_available() self.device = torch.device('cuda' if self.cuda else 'cpu') self.train_id = train_id self.logger = logger self.current_MIoU = 0 self.best_MIou = 0 self.best_source_MIou = 0 self.current_epoch = 0 self.current_iter = 0 self.second_best_MIou = 0 # set TensorboardX self.writer = SummaryWriter(self.args.checkpoint_dir) # Metric definition self.Eval = Eval(self.args.num_classes) # loss definition self.loss = nn.CrossEntropyLoss(weight=None, ignore_index= -1) self.loss.to(self.device) # model self.model, params = get_model(self.args) self.model = nn.DataParallel(self.model, device_ids=[0]) self.model.to(self.device) if self.args.optim == "SGD": self.optimizer = torch.optim.SGD( params=params, momentum=self.args.momentum, weight_decay=self.args.weight_decay ) elif self.args.optim == "Adam": self.optimizer = torch.optim.Adam(params, betas=(0.9, 0.99), weight_decay=self.args.weight_decay) # dataloader if self.args.dataset=="cityscapes": self.dataloader = City_DataLoader(self.args) elif self.args.dataset=="gta5": self.dataloader = GTA5_DataLoader(self.args) else: self.dataloader = SYNTHIA_DataLoader(self.args) self.dataloader.num_iterations = min(self.dataloader.num_iterations, ITER_MAX) print(self.args.iter_max, self.dataloader.num_iterations) self.epoch_num = ceil(self.args.iter_max / self.dataloader.num_iterations) if self.args.iter_stop is None else \ ceil(self.args.iter_stop / self.dataloader.num_iterations)
def source_val_info(Eval, name): PA = Eval.Pixel_Accuracy() MPA = Eval.Mean_Pixel_Accuracy() MIoU = Eval.Mean_Intersection_over_Union() FWIoU = Eval.Frequency_Weighted_Intersection_over_Union() PC = Eval.Mean_Precision() self.writer.add_scalar('source_PA'+name, PA, self.current_epoch) self.writer.add_scalar('source_MPA'+name, MPA, self.current_epoch) self.writer.add_scalar('source_MIoU'+name, MIoU, self.current_epoch) self.writer.add_scalar('source_FWIoU'+name, FWIoU, self.current_epoch) print("########## Source Eval{} ############".format(name)) self.logger.info('\nEpoch:{:.3f}, source {} PA1:{:.3f}, MPA1:{:.3f}, MIoU1:{:.3f}, FWIoU1:{:.3f}, PC:{:.3f}'.format(self.current_epoch, name, PA, MPA, MIoU, FWIoU, PC)) return PA, MPA, MIoU, FWIoU
def _main(): args = get_generate_config() setattr(args, 'PAD_index', constants.PAD_index) setattr(args, 'BOS_index', constants.BOS_index) setattr(args, 'EOS_index', constants.EOS_index) setattr(args, 'rank', 0) assert (args.file is None) ^ (args.raw_file is None) os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(args.cuda_num) model_state_dict, model_config = load_model(args.model_path) for key, value in model_config.items(): setattr(args, key, value) print(args) get_vocab_info(args) model = make_model(args, model_state_dict, 0, False) setattr(args, 'model', model) get_dataloader(args) output = generate(args) output = restore_rank(output) if not os.path.exists(args.output_path): os.makedirs(args.output_path) save_file = os.path.join(args.output_path, 'result.txt') save2file(output, save_file) if args.ref_file is not None: eval = Eval(reference_file=args.ref_file) eval(save_file)
class Tester(): def __init__(self, args): self.args = args self.logger = logging.getLogger() self.logger.setLevel(logging.INFO) self.Eval = Eval(self.args.num_classes) self.model = DeepLab(output_stride=self.args.output_stride, class_num=self.args.num_classes, pretrained=self.args.imagenet_pretrained and self.args.pretrained_ckpt_file == None, bn_momentum=self.args.bn_momentum, freeze_bn=self.args.freeze_bn) if self.args.pretrained_ckpt_file: self.load_checkpoint(self.args.pretrained_ckpt_file) def load_checkpoint(self, filename): try: self.logger.info("Loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename, map_location=torch.device('cpu')) # self.current_epoch = checkpoint['epoch'] # self.current_iter = checkpoint['iteration'] state_dict = checkpoint['state_dict'] new_dict = {} for key in state_dict: new_dict[key[7:]] = state_dict[key] self.model.load_state_dict(new_dict) # self.optimizer.load_state_dict(checkpoint['optimizer']) # self.best_MIou = checkpoint['best_MIou'] # # self.logger.info("Checkpoint loaded successfully from '{}' at (epoch {}) at (iteration {},MIoU:{})\n" # .format(self.args.checkpoint_dir, checkpoint['epoch'], checkpoint['iteration'], # checkpoint['best_MIou'])) except OSError as e: self.logger.info( "No checkpoint exists from '{}'. Skipping...".format( self.args.checkpoint_dir)) self.logger.info("**First time to train**") def predict(self, input): self.logger.info('validating one epoch...') self.Eval.reset() with torch.no_grad(): self.model.eval() pred = self.model(input) return pred
def val_info(Eval, name): PA = Eval.Pixel_Accuracy() MPA_16, MPA_13 = Eval.Mean_Pixel_Accuracy() MIoU_16, MIoU_13 = Eval.Mean_Intersection_over_Union() FWIoU_16, FWIoU_13 = Eval.Frequency_Weighted_Intersection_over_Union() PC_16, PC_13 = Eval.Mean_Precision() print("########## Eval{} ############".format(name)) self.logger.info('\nEpoch:{:.3f}, {} PA:{:.3f}, MPA_16:{:.3f}, MIoU_16:{:.3f}, FWIoU_16:{:.3f}, PC_16:{:.3f}'.format(self.current_epoch, name, PA, MPA_16, MIoU_16, FWIoU_16, PC_16)) self.logger.info('\nEpoch:{:.3f}, {} PA:{:.3f}, MPA_13:{:.3f}, MIoU_13:{:.3f}, FWIoU_13:{:.3f}, PC_13:{:.3f}'.format(self.current_epoch, name, PA, MPA_13, MIoU_13, FWIoU_13, PC_13)) self.writer.add_scalar('PA'+name, PA, self.current_epoch) self.writer.add_scalar('MPA_16'+name, MPA_16, self.current_epoch) self.writer.add_scalar('MIoU_16'+name, MIoU_16, self.current_epoch) self.writer.add_scalar('FWIoU_16'+name, FWIoU_16, self.current_epoch) self.writer.add_scalar('MPA_13'+name, MPA_13, self.current_epoch) self.writer.add_scalar('MIoU_13'+name, MIoU_13, self.current_epoch) self.writer.add_scalar('FWIoU_13'+name, FWIoU_13, self.current_epoch) return PA, MPA_13, MIoU_13, FWIoU_13
def __init__(self, args, config, cuda=None): self.args = args os.environ["CUDA_VISIBLE_DEVICES"] = self.args.gpu self.config = config self.cuda = cuda and torch.cuda.is_available() self.device = torch.device('cuda' if self.cuda else 'cpu') self.best_MIou = 0 self.current_epoch = 0 self.epoch_num = self.config.epoch_num self.current_iter = 0 self.writer = SummaryWriter() # path definition self.val_list_filepath = os.path.join( args.data_root_path, 'VOC2012/ImageSets/Segmentation/val.txt') self.gt_filepath = os.path.join(args.data_root_path, 'VOC2012/SegmentationClass/') self.pre_filepath = os.path.join(args.data_root_path, 'VOC2012/JPEGImages/') # Metric definition self.Eval = Eval(self.config.num_classes) # loss definition if args.loss_weight: classes_weights_path = os.path.join( self.config.classes_weight, self.args.dataset + 'classes_weights_log.npy') print(classes_weights_path) if not os.path.isfile(classes_weights_path): logger.info('calculating class weights...') calculate_weigths_labels(self.config) class_weights = np.load(classes_weights_path) pprint.pprint(class_weights) weight = torch.from_numpy(class_weights.astype(np.float32)) logger.info('loading class weights successfully!') else: weight = None self.loss = nn.CrossEntropyLoss(weight=weight, ignore_index=255) self.loss.to(self.device) # model self.model = DeepLab(output_stride=self.args.output_stride, class_num=self.config.num_classes, pretrained=self.args.imagenet_pretrained, bn_momentum=self.args.bn_momentum, freeze_bn=self.args.freeze_bn) self.model = nn.DataParallel(self.model, device_ids=range(4)) patch_replication_callback(self.model) self.model.to(self.device) self.optimizer = torch.optim.SGD( params=[ { "params": self.get_params(self.model.module, key="1x"), "lr": self.args.lr, }, { "params": self.get_params(self.model.module, key="10x"), "lr": 10 * self.args.lr, }, ], momentum=self.config.momentum, # dampening=self.config.dampening, weight_decay=self.config.weight_decay, # nesterov=self.config.nesterov ) # dataloader self.dataloader = VOCDataLoader(self.args, self.config)
class Trainer(): def __init__(self, args, config, cuda=None): self.args = args os.environ["CUDA_VISIBLE_DEVICES"] = self.args.gpu self.config = config self.cuda = cuda and torch.cuda.is_available() self.device = torch.device('cuda' if self.cuda else 'cpu') self.best_MIou = 0 self.current_epoch = 0 self.epoch_num = self.config.epoch_num self.current_iter = 0 self.writer = SummaryWriter() # path definition self.val_list_filepath = os.path.join( args.data_root_path, 'VOC2012/ImageSets/Segmentation/val.txt') self.gt_filepath = os.path.join(args.data_root_path, 'VOC2012/SegmentationClass/') self.pre_filepath = os.path.join(args.data_root_path, 'VOC2012/JPEGImages/') # Metric definition self.Eval = Eval(self.config.num_classes) # loss definition if args.loss_weight: classes_weights_path = os.path.join( self.config.classes_weight, self.args.dataset + 'classes_weights_log.npy') print(classes_weights_path) if not os.path.isfile(classes_weights_path): logger.info('calculating class weights...') calculate_weigths_labels(self.config) class_weights = np.load(classes_weights_path) pprint.pprint(class_weights) weight = torch.from_numpy(class_weights.astype(np.float32)) logger.info('loading class weights successfully!') else: weight = None self.loss = nn.CrossEntropyLoss(weight=weight, ignore_index=255) self.loss.to(self.device) # model self.model = DeepLab(output_stride=self.args.output_stride, class_num=self.config.num_classes, pretrained=self.args.imagenet_pretrained, bn_momentum=self.args.bn_momentum, freeze_bn=self.args.freeze_bn) self.model = nn.DataParallel(self.model, device_ids=range(4)) patch_replication_callback(self.model) self.model.to(self.device) self.optimizer = torch.optim.SGD( params=[ { "params": self.get_params(self.model.module, key="1x"), "lr": self.args.lr, }, { "params": self.get_params(self.model.module, key="10x"), "lr": 10 * self.args.lr, }, ], momentum=self.config.momentum, # dampening=self.config.dampening, weight_decay=self.config.weight_decay, # nesterov=self.config.nesterov ) # dataloader self.dataloader = VOCDataLoader(self.args, self.config) def main(self): # set TensorboardX # display config details logger.info("Global configuration as follows:") pprint.pprint(self.config) pprint.pprint(self.args) # choose cuda if self.cuda: # torch.cuda.set_device(4) current_device = torch.cuda.current_device() logger.info("This model will run on {}".format( torch.cuda.get_device_name(current_device))) else: logger.info("This model will run on CPU") # load pretrained checkpoint if self.args.pretrained: self.load_checkpoint(self.args.saved_checkpoint_file) # train self.train() self.writer.close() def train(self): for epoch in tqdm(range(self.current_epoch, self.epoch_num), desc="Total {} epochs".format( self.config.epoch_num)): self.current_epoch = epoch # self.scheduler.step(epoch) self.train_one_epoch() # validate PA, MPA, MIoU, FWIoU = self.validate() self.writer.add_scalar('PA', PA, self.current_epoch) self.writer.add_scalar('MPA', MPA, self.current_epoch) self.writer.add_scalar('MIoU', MIoU, self.current_epoch) self.writer.add_scalar('FWIoU', FWIoU, self.current_epoch) is_best = MIoU > self.best_MIou if is_best: self.best_MIou = MIoU self.save_checkpoint(is_best, self.args.store_checkpoint_name) # writer.add_scalar('PA', PA) # print(PA) def train_one_epoch(self): tqdm_epoch = tqdm(self.dataloader.train_loader, total=self.dataloader.train_iterations, desc="Train Epoch-{}-".format(self.current_epoch + 1)) logger.info("Training one epoch...") self.Eval.reset() # Set the model to be in training mode (for batchnorm and dropout) train_loss = [] preds = [] lab = [] self.model.train() # Initialize your average meters batch_idx = 0 for x, y, _ in tqdm_epoch: self.poly_lr_scheduler( optimizer=self.optimizer, init_lr=self.args.lr, iter=self.current_iter, max_iter=self.args.iter_max, power=self.config.poly_power, ) if self.current_iter >= self.args.iter_max: logger.info("iteration arrive {}!".format(self.args.iter_max)) break self.writer.add_scalar('learning_rate', self.optimizer.param_groups[0]["lr"], self.current_iter) self.writer.add_scalar('learning_rate_10x', self.optimizer.param_groups[1]["lr"], self.current_iter) # y.to(torch.long) if self.cuda: x, y = x.to(self.device), y.to(device=self.device, dtype=torch.long) self.optimizer.zero_grad() # model pred = self.model(x) # logger.info("pre:{}".format(pred.data.cpu().numpy())) y = torch.squeeze(y, 1) # logger.info("y:{}".format(y.cpu().numpy())) # pred_s = F.softmax(pred, dim=1) # loss cur_loss = self.loss(pred, y) # optimizer cur_loss.backward() self.optimizer.step() train_loss.append(cur_loss.item()) if batch_idx % self.config.batch_save == 0: logger.info("The train loss of epoch{}-batch-{}:{}".format( self.current_epoch, batch_idx, cur_loss.item())) batch_idx += 1 self.current_iter += 1 # print(cur_loss) if np.isnan(float(cur_loss.item())): raise ValueError('Loss is nan during training...') pred = pred.data.cpu().numpy() label = y.cpu().numpy() argpred = np.argmax(pred, axis=1) self.Eval.add_batch(label, argpred) PA = self.Eval.Pixel_Accuracy() MPA = self.Eval.Mean_Pixel_Accuracy() MIoU = self.Eval.Mean_Intersection_over_Union() FWIoU = self.Eval.Frequency_Weighted_Intersection_over_Union() logger.info( 'Epoch:{}, train PA1:{}, MPA1:{}, MIoU1:{}, FWIoU1:{}'.format( self.current_epoch, PA, MPA, MIoU, FWIoU)) tr_loss = sum(train_loss) / len(train_loss) self.writer.add_scalar('train_loss', tr_loss, self.current_epoch) tqdm.write("The average loss of train epoch-{}-:{}".format( self.current_epoch, tr_loss)) tqdm_epoch.close() def validate(self): logger.info('validating one epoch...') self.Eval.reset() with torch.no_grad(): tqdm_batch = tqdm(self.dataloader.valid_loader, total=self.dataloader.valid_iterations, desc="Val Epoch-{}-".format(self.current_epoch + 1)) val_loss = [] preds = [] lab = [] self.model.eval() for x, y, id in tqdm_batch: # y.to(torch.long) if self.cuda: x, y = x.to(self.device), y.to(device=self.device, dtype=torch.long) # model pred = self.model(x) y = torch.squeeze(y, 1) cur_loss = self.loss(pred, y) if np.isnan(float(cur_loss.item())): raise ValueError('Loss is nan during validating...') val_loss.append(cur_loss.item()) # if self.args.store_result == True and self.current_epoch == 20: # for i in range(len(id)): # result = Image.fromarray(np.asarray(argpred, dtype=np.uint8)[i], mode='P') # # logger.info("before:{}".format(result.mode)) # result = result.convert("RGB") # # logger.info("after:{}".format(result.mode)) # # logger.info("shape:{}".format(result.getpixel((1,1)))) # result.save(self.args.result_filepath + id[i] + '.png') pred = pred.data.cpu().numpy() label = y.cpu().numpy() argpred = np.argmax(pred, axis=1) self.Eval.add_batch(label, argpred) PA = self.Eval.Pixel_Accuracy() MPA = self.Eval.Mean_Pixel_Accuracy() MIoU = self.Eval.Mean_Intersection_over_Union() FWIoU = self.Eval.Frequency_Weighted_Intersection_over_Union() logger.info( 'Epoch:{}, validation PA1:{}, MPA1:{}, MIoU1:{}, FWIoU1:{}'. format(self.current_epoch, PA, MPA, MIoU, FWIoU)) v_loss = sum(val_loss) / len(val_loss) logger.info("The average loss of val loss:{}".format(v_loss)) self.writer.add_scalar('val_loss', v_loss, self.current_epoch) # logger.info(score) tqdm_batch.close() return PA, MPA, MIoU, FWIoU def save_checkpoint(self, is_best, filename=None): """ Save checkpoint if a new best is achieved :param state: :param is_best: :param filepath: :return: """ filename = os.path.join(self.args.checkpoint_dir, filename) state = { 'epoch': self.current_epoch + 1, 'iteration': self.current_iter, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_MIou': self.best_MIou } if is_best: logger.info("=>saving a new best checkpoint...") torch.save(state, filename) else: logger.info("=> The MIoU of val does't improve.") def load_checkpoint(self, filename): filename = os.path.join(self.args.checkpoint_dir, filename) try: logger.info("Loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename) # self.current_epoch = checkpoint['epoch'] # self.current_iter = checkpoint['iteration'] self.model.load_state_dict(checkpoint['state_dict']) self.optimizer.load_state_dict(checkpoint['optimizer']) self.best_MIou = checkpoint['best_MIou'] logger.info( "Checkpoint loaded successfully from '{}' at (epoch {}) at (iteration {},MIoU:{})\n" .format(self.args.checkpoint_dir, checkpoint['epoch'], checkpoint['iteration'], checkpoint['best_MIou'])) except OSError as e: logger.info("No checkpoint exists from '{}'. Skipping...".format( self.args.checkpoint_dir)) logger.info("**First time to train**") def get_params(self, model, key): # For Dilated CNN if key == "1x": for m in model.named_modules(): if "Resnet101" in m[0]: if isinstance(m[1], nn.Conv2d): for p in m[1].parameters(): yield p # if key == "10x": for m in model.named_modules(): if "encoder" in m[0] or "decoder" in m[0]: if isinstance(m[1], nn.Conv2d): for p in m[1].parameters(): yield p def poly_lr_scheduler(self, optimizer, init_lr, iter, max_iter, power): new_lr = init_lr * (1 - float(iter) / max_iter)**power optimizer.param_groups[0]["lr"] = new_lr optimizer.param_groups[1]["lr"] = 10 * new_lr
def __init__(self, args, cuda=None): self.args = args os.environ["CUDA_VISIBLE_DEVICES"] = self.args.gpu self.cuda = cuda and torch.cuda.is_available() self.device = torch.device('cuda' if self.cuda else 'cpu') self.current_MIoU = 0 self.best_MIou = 0 self.current_epoch = 0 self.current_iter = 0 # set TensorboardX self.writer = SummaryWriter(log_dir=self.args.run_name) # Metric definition self.Eval = Eval(self.args.num_classes) # loss definition if self.args.loss_weight_file is not None: classes_weights_path = os.path.join(self.args.loss_weights_dir, self.args.loss_weight_file) print(classes_weights_path) if not os.path.isfile(classes_weights_path): logger.info('calculating class weights...') calculate_weigths_labels(self.args) class_weights = np.load(classes_weights_path) pprint.pprint(class_weights) weight = torch.from_numpy(class_weights.astype(np.float32)) logger.info('loading class weights successfully!') else: weight = None self.loss = nn.CrossEntropyLoss(weight=weight, ignore_index=255) self.loss.to(self.device) # model self.model = Unet_decoder(output_stride=self.args.output_stride, class_num=self.args.num_classes, pretrained=self.args.imagenet_pretrained and self.args.pretrained_ckpt_file == None, bn_momentum=self.args.bn_momentum, freeze_bn=self.args.freeze_bn) self.model = nn.DataParallel(self.model, device_ids=range( ceil(len(self.args.gpu) / 2))) patch_replication_callback(self.model) self.model.to(self.device) self.optimizer = torch.optim.SGD( params=[ { "params": self.get_params(self.model.module, key="1x"), "lr": self.args.lr, }, { "params": self.get_params(self.model.module, key="10x"), "lr": 10 * self.args.lr, }, ], momentum=self.args.momentum, # dampening=self.args.dampening, weight_decay=self.args.weight_decay, # nesterov=self.args.nesterov ) # dataloader self.dataloader = VOCDataLoader(self.args) self.epoch_num = ceil(self.args.iter_max / self.dataloader.train_iterations)
lr=config.learning_rate, betas=(config.beta_1, config.beta_2), eps=config.eps, weight_decay=config.weight_decay), d_model=config.transformer_embedding_dim, warmup_steps=config.warmup_steps, factor=config.factor) criterion_para = CopyLoss(ignore_index=Constants.PAD, reduction='none').to(device_para) criterion_back = LabelSmoothing(smoothing=config.smoothing, ignore_index=Constants.PAD).to(device_back) generator = generator(test_data=test_dataloader, source_path=args.test_source[0], eval=Eval(args.test_source[0], args.test_target[0]), word2index=word2index, index2word=index2word, UNK_WORD=Constants.UNK_WORD, PAD=Constants.PAD) fit = Fit(train_data=train_dataloader, para_model=para_model, back_model=back_model, optim=optim, criterion_para=criterion_para, criterion_back=criterion_back, generator=generator, num_rounds=args.num_rounds, epoch=args.epoch, device_para=device_para,
for epoch in range(1, cfg.num_epoch): # 根据训练的epoch进度调节lr if epoch in [20,30]: lr *= 0.1 optimizer = torch.optim.Adam(model.parameters(), lr, weight_decay=5e-4) model.train() for inp, hm, true_mask, ind, wh, offset in tqdm(train_loader): inp, hm, true_mask, ind, wh, offset = inp.cuda(), hm.cuda(), true_mask.cuda(), ind.cuda(), wh.cuda(), offset.cuda() outputs = model(inp) total_loss, hm_loss, wh_loss, off_loss = loss_func(outputs, hm, true_mask, ind, wh, offset) total_loss.backward() optimizer.step() optimizer.zero_grad() # save_model(os.path.join(os.path.dirname(__file__), 'weights', '_{}.pth'.format(epoch)), epoch, model, # optimizer) eval_result = Eval(model=model, test_loader=val_loader) ap_table = [["Index", "Class name", "Precision", "Recall", "AP", "F1-score"]] for p, r, ap, f1, cls_id in zip(*eval_result): ap_table += [[cls_id, cfg.class_name[cls_id], "%.3f" % p, "%.3f" % r, "%.3f" % ap, "%.3f" % f1]] print('\n' + AsciiTable(ap_table).table) eval_map = round(eval_result[2].mean(), 4) print("Epoch %d/%d ---- mAP:%.4f Loss:%.4f" % (epoch, cfg.num_epoch, eval_map, total_loss.item())) vis.line(X=np.array([epoch]), Y=np.array([hm_loss.item()]), win='hm', update=None if epoch == 1 else 'append', opts={'title': 'hm'}) vis.line(X=np.array([epoch]), Y=np.array([wh_loss.item()]), win='wh', update=None if epoch == 1 else 'append', opts={'title': 'wh'}) vis.line(X=np.array([epoch]), Y=np.array([off_loss.item()]), win='offset', update=None if epoch == 1 else 'append', opts={'title': 'offset'}) vis.line(X=np.array([epoch]), Y=np.array([eval_map]), win='map', update=None if epoch == 1 else 'append', opts={'title': 'map'}) if eval_map > mAP:
from utils.eval import Eval word2index, index2word = load_vocab(args.vocab_path) source = lang(filelist=args.source, word2index=word2index, PAD=Constants.PAD_WORD) dataloader = get_dataloader(source=source, batch_size=args.batch_size, shuffle=False) model = get_vae(vocab_size=len(word2index), device=device, checkpoint_path=args.model_path) eval = Eval(args.source[0], args.target[0]) if args.target is not None else None generator = generator(test_data=dataloader, eval=eval, word2index=word2index, index2word=index2word, source_path=args.source[0], UNK_WORD=Constants.UNK_WORD, PAD=Constants.PAD) generator(model=model, max_length=args.max_length, num_rounds=args.num_rounds, device=device, save_path=args.save_path, save_info=args.save_path,
class Evaluater(): def __init__(self, args, cuda=None, train_id=None, logger=None): self.args = args os.environ["CUDA_VISIBLE_DEVICES"] = self.args.gpu self.cuda = cuda and torch.cuda.is_available() self.device = torch.device('cuda' if self.cuda else 'cpu') self.current_MIoU = 0 self.best_MIou = 0 self.current_epoch = 0 self.current_iter = 0 self.train_id = train_id self.logger = logger # set TensorboardX self.writer = SummaryWriter(self.args.checkpoint_dir) # Metric definition self.Eval = Eval(self.args.num_classes) # loss definition self.loss = nn.CrossEntropyLoss(ignore_index=-1) self.loss.to(self.device) # model self.model, params = get_model(self.args) self.model = nn.DataParallel(self.model, device_ids=[0]) self.model.to(self.device) # load pretrained checkpoint if self.args.pretrained_ckpt_file is not None: path1 = os.path.join(*self.args.checkpoint_dir.split('/')[:-1], self.train_id + 'best.pth') path2 = self.args.pretrained_ckpt_file if os.path.exists(path1): pretrained_ckpt_file = path1 elif os.path.exists(path2): pretrained_ckpt_file = path2 else: raise AssertionError("no pretrained_ckpt_file") self.load_checkpoint(pretrained_ckpt_file) # dataloader self.dataloader = City_DataLoader( self.args ) if self.args.dataset == "cityscapes" else GTA5_DataLoader(self.args) if self.args.city_name != "None": target_data_set = CrossCity_Dataset( self.args, data_root_path=self.args.data_root_path, list_path=self.args.list_path, split='val', base_size=self.args.target_base_size, crop_size=self.args.target_crop_size, class_13=self.args.class_13) self.target_val_dataloader = data.DataLoader( target_data_set, batch_size=self.args.batch_size, shuffle=False, num_workers=self.args.data_loader_workers, pin_memory=self.args.pin_memory, drop_last=True) self.dataloader.val_loader = self.target_val_dataloader self.dataloader.valid_iterations = ( len(target_data_set) + self.args.batch_size) // self.args.batch_size else: self.dataloader.val_loader = self.dataloader.data_loader self.dataloader.valid_iterations = min( self.dataloader.num_iterations, 500) self.epoch_num = ceil(self.args.iter_max / self.dataloader.num_iterations) def main(self): # choose cuda if self.cuda: current_device = torch.cuda.current_device() self.logger.info("This model will run on {}".format( torch.cuda.get_device_name(current_device))) else: self.logger.info("This model will run on CPU") # validate self.validate() self.writer.close() def validate(self): self.logger.info('validating one epoch...') self.Eval.reset() with torch.no_grad(): tqdm_batch = tqdm(self.dataloader.val_loader, total=self.dataloader.valid_iterations, desc="Val Epoch-{}-".format(self.current_epoch + 1)) val_loss = [] self.model.eval() i = 0 count = 0 for x, y, id in tqdm_batch: i += 1 if self.cuda: x, y = x.to(self.device), y.to(device=self.device, dtype=torch.long) # model pred = self.model(x) if isinstance(pred, tuple): pred_2 = pred[1] pred = pred[0] y = torch.squeeze(y, 1) if self.args.flip: pred_P = F.softmax(pred, dim=1) def flip(x, dim): dim = x.dim() + dim if dim < 0 else dim inds = tuple( slice(None, None) if i != dim else x. new(torch.arange(x.size(i) - 1, -1, -1).tolist()).long() for i in range(x.dim())) return x[inds] x_flip = flip(x, -1) pred_flip = self.model(x_flip) if isinstance(pred_flip, tuple): pred_flip = pred_flip[0] pred_P_flip = F.softmax(pred_flip, dim=1) pred_P_2 = flip(pred_P_flip, -1) pred_c = (pred_P + pred_P_2) / 2 pred = pred_c.data.cpu().numpy() else: pred = pred.data.cpu().numpy() label = y.cpu().numpy() argpred = np.argmax(pred, axis=1) self.Eval.add_batch(label, argpred) if i == self.dataloader.valid_iterations: break if i % 20 == 0 and self.args.image_summary: #show val result on tensorboard images_inv = inv_preprocess( x.clone().cpu(), self.args.show_num_images, numpy_transform=self.args.numpy_transform) labels_colors = decode_labels(label, self.args.show_num_images) preds_colors = decode_labels(argpred, self.args.show_num_images) for index, (img, lab, color_pred) in enumerate( zip(images_inv, labels_colors, preds_colors)): self.writer.add_image('eval/' + str(index) + '/Images', img, self.current_epoch) self.writer.add_image('eval/' + str(index) + '/Labels', lab, self.current_epoch) self.writer.add_image('eval/' + str(index) + '/preds', color_pred, self.current_epoch) #show val result on tensorboard if self.args.image_summary: images_inv = inv_preprocess( x.clone().cpu(), self.args.show_num_images, numpy_transform=self.args.numpy_transform) labels_colors = decode_labels(label, self.args.show_num_images) preds_colors = decode_labels(argpred, self.args.show_num_images) for index, (img, lab, color_pred) in enumerate( zip(images_inv, labels_colors, preds_colors)): self.writer.add_image('0Images/' + str(index), img, self.current_epoch) self.writer.add_image('a' + str(index) + '/Labels', lab, self.current_epoch) self.writer.add_image('a' + str(index) + '/preds', color_pred, self.current_epoch) if self.args.multi: self.writer.add_image('a' + str(index) + '/preds_2', preds_colors_2[index], self.current_epoch) self.writer.add_image('a' + str(index) + '/preds_c', preds_colors_c[index], self.current_epoch) # get eval result if self.args.class_16: def val_info(Eval, name): PA = Eval.Pixel_Accuracy() MPA_16, MPA_13 = Eval.Mean_Pixel_Accuracy() MIoU_16, MIoU_13 = Eval.Mean_Intersection_over_Union() FWIoU_16, FWIoU_13 = Eval.Frequency_Weighted_Intersection_over_Union( ) PC_16, PC_13 = Eval.Mean_Precision() print("########## Eval{} ############".format(name)) self.logger.info( '\nEpoch:{:.3f}, {} PA:{:.3f}, MPA_16:{:.3f}, MIoU_16:{:.3f}, FWIoU_16:{:.3f}, PC_16:{:.3f}' .format(self.current_epoch, name, PA, MPA_16, MIoU_16, FWIoU_16, PC_16)) self.logger.info( '\nEpoch:{:.3f}, {} PA:{:.3f}, MPA_13:{:.3f}, MIoU_13:{:.3f}, FWIoU_13:{:.3f}, PC_13:{:.3f}' .format(self.current_epoch, name, PA, MPA_13, MIoU_13, FWIoU_13, PC_13)) return PA, MPA_13, MIoU_13, FWIoU_13 else: def val_info(Eval, name): PA = Eval.Pixel_Accuracy() MPA = Eval.Mean_Pixel_Accuracy() MIoU = Eval.Mean_Intersection_over_Union() FWIoU = Eval.Frequency_Weighted_Intersection_over_Union() PC = Eval.Mean_Precision() print("########## Eval{} ############".format(name)) self.logger.info( '\nEpoch:{:.3f}, {} PA1:{:.3f}, MPA1:{:.3f}, MIoU1:{:.3f}, FWIoU1:{:.3f}, PC:{:.3f}' .format(self.current_epoch, name, PA, MPA, MIoU, FWIoU, PC)) return PA, MPA, MIoU, FWIoU PA, MPA, MIoU, FWIoU = val_info(self.Eval, "") name_classes, mIoUs = self.Eval.Print_Every_class_Eval() #ipdb.set_trace() row1 = [] row2 = [] with open(args.checkpoint_dir + '/mIoU.csv', 'a') as csvFile: writer = csv.writer(csvFile) for ind_class in range(len(name_classes) - 1): row1.append(name_classes[ind_class]) row2.append(round(mIoUs[ind_class] * 100, 2)) row1.extend(['mIoU']) row2.extend([round(np.nanmean(MIoU) * 100, 2)]) #row2.extend([round(np.mean(mIoUs[[0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 15, 17, 18]]) * 100, 2)]) #row2.extend([round(np.mean(mIoUs[[0, 1, 2, 6, 7, 8, 10, 11, 12, 13, 15, 17, 18]]) * 100, 2)]) writer.writerow(row1) writer.writerow(row2) csvFile.close() tqdm_batch.close() return PA, MPA, MIoU, FWIoU def load_checkpoint(self, filename): try: self.logger.info("Loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename) if 'state_dict' in checkpoint: self.model.load_state_dict(checkpoint['state_dict']) else: self.model.module.load_state_dict(checkpoint) self.logger.info("Checkpoint loaded successfully from " + filename) if 'crop_size' in checkpoint: self.args.crop_size = checkpoint['crop_size'] print(checkpoint['crop_size'], self.args.crop_size) except OSError as e: self.logger.info( "No checkpoint exists from '{}'. Skipping...".format(filename))
def __init__(self, args, cuda=None, train_id="None", logger=None): self.args = args self.cuda = cuda and torch.cuda.is_available() self.device = torch.device('cuda' if self.cuda else 'cpu') self.train_id = train_id self.logger = logger self.current_MIoU = 0 self.best_MIou = 0 self.best_source_MIou = 0 self.current_epoch = 0 self.current_iter = 0 self.second_best_MIou = 0 # set TensorboardX self.writer = SummaryWriter(self.args.checkpoint_dir) # Metric definition self.Eval = Eval(self.args.num_classes) # loss definition self.loss = nn.CrossEntropyLoss(weight=None, ignore_index= -1) self.loss.to(self.device) # model self.model, params = get_model(self.args) self.model = nn.DataParallel(self.model) self.model.to(self.device) # support for FCN8s if self.args.backbone == "fcn8s_vgg" and self.args.optim == "SGD": self.args.optim = "Adam" print('WARNING: FCN8s requires Adam optimizer, but SGD was set. Switching to Adam.') if self.args.optim == "SGD": self.optimizer = torch.optim.SGD( params=params, momentum=self.args.momentum, weight_decay=self.args.weight_decay ) elif self.args.optim == "Adam": self.optimizer = torch.optim.Adam(params, betas=(0.9, 0.99), weight_decay=self.args.weight_decay) # dataloader if DEBUG: print('DEBUG: Loading training and validation datasets (for UDA only val one is used, but it is overwritten)') if self.args.dataset=="cityscapes": self.dataloader = City_DataLoader(self.args) elif self.args.dataset=="gta5": self.dataloader = GTA5_DataLoader(self.args) else: self.dataloader = SYNTHIA_DataLoader(self.args) ### use_target_val = True if use_target_val: if DEBUG: print('DEBUG: Overwriting validation set, using target set instead of source one') target_data_set = City_Dataset(args, data_root_path=datasets_path['cityscapes']['data_root_path'], list_path=datasets_path['cityscapes']['list_path'], split='test', base_size=args.target_base_size, crop_size=args.target_crop_size, class_16=args.class_16) self.target_val_dataloader = data.DataLoader(target_data_set, batch_size=self.args.batch_size, shuffle=False, num_workers=self.args.data_loader_workers, pin_memory=self.args.pin_memory, drop_last=True) self.dataloader.val_loader = self.target_val_dataloader ### self.dataloader.num_iterations = min(self.dataloader.num_iterations, ITER_MAX) self.epoch_num = ceil(self.args.iter_max / self.dataloader.num_iterations) if self.args.iter_stop is None else \ ceil(self.args.iter_stop / self.dataloader.num_iterations)
def __init__(self, cfg, logger, writer): # Args self.cfg = cfg self.device = torch.device('cuda') self.logger = logger self.writer = writer # Counters self.epoch = 0 self.iter = 0 self.current_MIoU = 0 self.best_MIou = 0 self.best_source_MIou = 0 # Metrics self.evaluator = Eval(self.cfg.data.num_classes) # Loss self.ignore_index = -1 self.loss = nn.CrossEntropyLoss(ignore_index=self.ignore_index) # Model self.model, params = get_model(self.cfg) # self.model = nn.DataParallel(self.model, device_ids=[0]) # TODO: test multi-gpu self.model.to(self.device) # EMA self.ema = EMA(self.model, self.cfg.ema_decay) # Optimizer if self.cfg.opt.kind == "SGD": self.optimizer = torch.optim.SGD( params, momentum=self.cfg.opt.momentum, weight_decay=self.cfg.opt.weight_decay) elif self.cfg.opt.kind == "Adam": self.optimizer = torch.optim.Adam( params, betas=(0.9, 0.99), weight_decay=self.cfg.opt.weight_decay) else: raise NotImplementedError() self.lr_factor = 10 # Source if self.cfg.data.source.dataset == 'synthia': source_train_dataset = SYNTHIA_Dataset( split='train', **self.cfg.data.source.kwargs) source_val_dataset = SYNTHIA_Dataset(split='val', **self.cfg.data.source.kwargs) elif self.cfg.data.source.dataset == 'gta5': source_train_dataset = GTA5_Dataset(split='train', **self.cfg.data.source.kwargs) source_val_dataset = GTA5_Dataset(split='val', **self.cfg.data.source.kwargs) else: raise NotImplementedError() self.source_dataloader = DataLoader(source_train_dataset, shuffle=True, drop_last=True, **self.cfg.data.loader.kwargs) self.source_val_dataloader = DataLoader(source_val_dataset, shuffle=False, drop_last=False, **self.cfg.data.loader.kwargs) # Target if self.cfg.data.target.dataset == 'cityscapes': target_train_dataset = City_Dataset(split='train', **self.cfg.data.target.kwargs) target_val_dataset = City_Dataset(split='val', **self.cfg.data.target.kwargs) else: raise NotImplementedError() self.target_dataloader = DataLoader(target_train_dataset, shuffle=True, drop_last=True, **self.cfg.data.loader.kwargs) self.target_val_dataloader = DataLoader(target_val_dataset, shuffle=False, drop_last=False, **self.cfg.data.loader.kwargs) # Perturbations if self.cfg.lam_aug > 0: self.aug = get_augmentation()
class Trainer(): def __init__(self, cfg, logger, writer): # Args self.cfg = cfg self.device = torch.device('cuda') self.logger = logger self.writer = writer # Counters self.epoch = 0 self.iter = 0 self.current_MIoU = 0 self.best_MIou = 0 self.best_source_MIou = 0 # Metrics self.evaluator = Eval(self.cfg.data.num_classes) # Loss self.ignore_index = -1 self.loss = nn.CrossEntropyLoss(ignore_index=self.ignore_index) # Model self.model, params = get_model(self.cfg) # self.model = nn.DataParallel(self.model, device_ids=[0]) # TODO: test multi-gpu self.model.to(self.device) # EMA self.ema = EMA(self.model, self.cfg.ema_decay) # Optimizer if self.cfg.opt.kind == "SGD": self.optimizer = torch.optim.SGD( params, momentum=self.cfg.opt.momentum, weight_decay=self.cfg.opt.weight_decay) elif self.cfg.opt.kind == "Adam": self.optimizer = torch.optim.Adam( params, betas=(0.9, 0.99), weight_decay=self.cfg.opt.weight_decay) else: raise NotImplementedError() self.lr_factor = 10 # Source if self.cfg.data.source.dataset == 'synthia': source_train_dataset = SYNTHIA_Dataset( split='train', **self.cfg.data.source.kwargs) source_val_dataset = SYNTHIA_Dataset(split='val', **self.cfg.data.source.kwargs) elif self.cfg.data.source.dataset == 'gta5': source_train_dataset = GTA5_Dataset(split='train', **self.cfg.data.source.kwargs) source_val_dataset = GTA5_Dataset(split='val', **self.cfg.data.source.kwargs) else: raise NotImplementedError() self.source_dataloader = DataLoader(source_train_dataset, shuffle=True, drop_last=True, **self.cfg.data.loader.kwargs) self.source_val_dataloader = DataLoader(source_val_dataset, shuffle=False, drop_last=False, **self.cfg.data.loader.kwargs) # Target if self.cfg.data.target.dataset == 'cityscapes': target_train_dataset = City_Dataset(split='train', **self.cfg.data.target.kwargs) target_val_dataset = City_Dataset(split='val', **self.cfg.data.target.kwargs) else: raise NotImplementedError() self.target_dataloader = DataLoader(target_train_dataset, shuffle=True, drop_last=True, **self.cfg.data.loader.kwargs) self.target_val_dataloader = DataLoader(target_val_dataset, shuffle=False, drop_last=False, **self.cfg.data.loader.kwargs) # Perturbations if self.cfg.lam_aug > 0: self.aug = get_augmentation() def train(self): # Loop over epochs self.continue_training = True while self.continue_training: # Train for a single epoch self.train_one_epoch() # Use EMA params to evaluate performance self.ema.apply_shadow() self.ema.model.eval() self.ema.model.cuda() # Validate on source (if possible) and target if self.cfg.data.source_val_iterations > 0: self.validate(mode='source') PA, MPA, MIoU, FWIoU = self.validate() # Restore current (non-EMA) params for training self.ema.restore() # Log val results self.writer.add_scalar('PA', PA, self.epoch) self.writer.add_scalar('MPA', MPA, self.epoch) self.writer.add_scalar('MIoU', MIoU, self.epoch) self.writer.add_scalar('FWIoU', FWIoU, self.epoch) # Save checkpoint if new best model self.current_MIoU = MIoU is_best = MIoU > self.best_MIou if is_best: self.best_MIou = MIoU self.best_iter = self.iter self.logger.info("=> Saving a new best checkpoint...") self.logger.info( "=> The best val MIoU is now {:.3f} from iter {}".format( self.best_MIou, self.best_iter)) self.save_checkpoint('best.pth') else: self.logger.info("=> The MIoU of val did not improve.") self.logger.info( "=> The best val MIoU is still {:.3f} from iter {}".format( self.best_MIou, self.best_iter)) self.epoch += 1 # Save final checkpoint self.logger.info("=> The best MIou was {:.3f} at iter {}".format( self.best_MIou, self.best_iter)) self.logger.info( "=> Saving the final checkpoint to {}".format('final.pth')) self.save_checkpoint('final.pth') def train_one_epoch(self): # Load and reset self.model.train() self.evaluator.reset() # Helper def unpack(x): return (x[0], x[1]) if isinstance(x, tuple) else (x, None) # Training loop total = min(len(self.source_dataloader), len(self.target_dataloader)) for batch_idx, (batch_s, batch_t) in enumerate( tqdm(zip(self.source_dataloader, self.target_dataloader), total=total, desc=f"Epoch {self.epoch + 1}")): # Learning rate self.poly_lr_scheduler(optimizer=self.optimizer) self.writer.add_scalar('train/lr', self.optimizer.param_groups[0]["lr"], self.iter) # Losses losses = {} ########################## # Source supervised loss # ########################## x, y, _ = batch_s if True: # For VS Code collapsing # Data x = x.to(self.device) y = y.squeeze(dim=1).to(device=self.device, dtype=torch.long, non_blocking=True) # Fourier mix: source --> target if self.cfg.source_fourier: x = fourier_mix(src_images=x, tgt_images=batch_t[0].to(self.device), L=self.cfg.fourier_beta) # Forward pred = self.model(x) pred_1, pred_2 = unpack(pred) # Loss (source) loss_source_1 = self.loss(pred_1, y) if self.cfg.aux: loss_source_2 = self.loss(pred_2, y) * self.cfg.lam_aux loss_source = loss_source_1 + loss_source_2 else: loss_source = loss_source_1 # Backward loss_source.backward() # Clean up losses['source_main'] = loss_source_1.cpu().item() if self.cfg.aux: losses['source_aux'] = loss_source_2.cpu().item() del x, y, loss_source, loss_source_1, loss_source_2 ###################### # Target Pseudolabel # ###################### x, _, _ = batch_t x = x.to(self.device) # First step: run non-augmented image though model to get predictions with torch.no_grad(): # Substep 1: forward pass pred = self.model(x.to(self.device)) pred_1, pred_2 = unpack(pred) # Substep 2: convert soft predictions to hard predictions pred_P_1 = F.softmax(pred_1, dim=1) label_1 = torch.argmax(pred_P_1.detach(), dim=1) maxpred_1, argpred_1 = torch.max(pred_P_1.detach(), dim=1) T = self.cfg.pseudolabel_threshold mask_1 = (maxpred_1 > T) ignore_tensor = torch.ones(1).to( self.device, dtype=torch.long) * self.ignore_index label_1 = torch.where(mask_1, label_1, ignore_tensor) if self.cfg.aux: pred_P_2 = F.softmax(pred_2, dim=1) maxpred_2, argpred_2 = torch.max(pred_P_2.detach(), dim=1) pred_c = (pred_P_1 + pred_P_2) / 2 maxpred_c, argpred_c = torch.max(pred_c, dim=1) mask = (maxpred_1 > T) | (maxpred_2 > T) label_2 = torch.where(mask, argpred_c, ignore_tensor) ############ # Aug loss # ############ if self.cfg.lam_aug > 0: # Second step: augment image and label x_aug, y_aug_1 = augment(images=x.cpu(), labels=label_1.detach().cpu(), aug=self.aug) y_aug_1 = y_aug_1.to(device=self.device, non_blocking=True) if self.cfg.aux: _, y_aug_2 = augment(images=x.cpu(), labels=label_2.detach().cpu(), aug=self.aug) y_aug_2 = y_aug_2.to(device=self.device, non_blocking=True) # Third step: run augmented image through model to get predictions pred_aug = self.model(x_aug.to(self.device)) pred_aug_1, pred_aug_2 = unpack(pred_aug) # Fourth step: calculate loss loss_aug_1 = self.loss(pred_aug_1, y_aug_1) * \ self.cfg.lam_aug if self.cfg.aux: loss_aug_2 = self.loss(pred_aug_2, y_aug_2) * \ self.cfg.lam_aug * self.cfg.lam_aux loss_aug = loss_aug_1 + loss_aug_2 else: loss_aug = loss_aug_1 # Backward loss_aug.backward() # Clean up losses['aug_main'] = loss_aug_1.cpu().item() if self.cfg.aux: losses['aug_aux'] = loss_aug_2.cpu().item() del pred_aug, pred_aug_1, pred_aug_2, loss_aug, loss_aug_1, loss_aug_2 ################ # Fourier Loss # ################ if self.cfg.lam_fourier > 0: # Second step: fourier mix x_fourier = fourier_mix(src_images=x.to(self.device), tgt_images=batch_s[0].to(self.device), L=self.cfg.fourier_beta) # Third step: run mixed image through model to get predictions pred_fourier = self.model(x_fourier.to(self.device)) pred_fourier_1, pred_fourier_2 = unpack(pred_fourier) # Fourth step: calculate loss loss_fourier_1 = self.loss(pred_fourier_1, label_1) * \ self.cfg.lam_fourier if self.cfg.aux: loss_fourier_2 = self.loss(pred_fourier_2, label_2) * \ self.cfg.lam_fourier * self.cfg.lam_aux loss_fourier = loss_fourier_1 + loss_fourier_2 else: loss_fourier = loss_fourier_1 # Backward loss_fourier.backward() # Clean up losses['fourier_main'] = loss_fourier_1.cpu().item() if self.cfg.aux: losses['fourier_aux'] = loss_fourier_2.cpu().item() del pred_fourier, pred_fourier_1, pred_fourier_2, loss_fourier, loss_fourier_1, loss_fourier_2 ############### # CutMix Loss # ############### if self.cfg.lam_cutmix > 0: # Second step: CutMix x_cutmix, y_cutmix = cutmix_combine( images_1=x, labels_1=label_1.unsqueeze(dim=1), images_2=batch_s[0].to(self.device), labels_2=batch_s[1].unsqueeze(dim=1).to(self.device, dtype=torch.long)) y_cutmix = y_cutmix.squeeze(dim=1) # Third step: run mixed image through model to get predictions pred_cutmix = self.model(x_cutmix) pred_cutmix_1, pred_cutmix_2 = unpack(pred_cutmix) # Fourth step: calculate loss loss_cutmix_1 = self.loss(pred_cutmix_1, y_cutmix) * \ self.cfg.lam_cutmix if self.cfg.aux: loss_cutmix_2 = self.loss(pred_cutmix_2, y_cutmix) * \ self.cfg.lam_cutmix * self.cfg.lam_aux loss_cutmix = loss_cutmix_1 + loss_cutmix_2 else: loss_cutmix = loss_cutmix_1 # Backward loss_cutmix.backward() # Clean up losses['cutmix_main'] = loss_cutmix_1.cpu().item() if self.cfg.aux: losses['cutmix_aux'] = loss_cutmix_2.cpu().item() del pred_cutmix, pred_cutmix_1, pred_cutmix_2, loss_cutmix, loss_cutmix_1, loss_cutmix_2 ############### # CutMix Loss # ############### # Step optimizer if accumulated enough gradients self.optimizer.step() self.optimizer.zero_grad() # Update model EMA parameters each step self.ema.update_params() # Calculate total loss total_loss = sum(losses.values()) # Log main losses for name, loss in losses.items(): self.writer.add_scalar(f'train/{name}', loss, self.iter) # Log if batch_idx % 100 == 0: log_string = f"[Epoch {self.epoch}]\t" log_string += '\t'.join( [f'{n}: {l:.3f}' for n, l in losses.items()]) self.logger.info(log_string) # Increment global iteration counter self.iter += 1 # End training after finishing iterations if self.iter > self.cfg.opt.iterations: self.continue_training = False return # After each epoch, update model EMA buffers (i.e. batch norm stats) self.ema.update_buffer() @torch.no_grad() def validate(self, mode='target'): """Validate on target""" self.logger.info('Validating') self.evaluator.reset() self.model.eval() # Select dataloader if mode == 'target': val_loader = self.target_val_dataloader elif mode == 'source': val_loader = self.source_val_dataloader else: raise NotImplementedError() # Loop for val_idx, (x, y, id) in enumerate( tqdm(val_loader, desc=f"Val Epoch {self.epoch + 1}")): if mode == 'source' and val_idx >= self.cfg.data.source_val_iterations: break # Forward x = x.to(self.device) y = y.to(device=self.device, dtype=torch.long) pred = self.model(x) if isinstance(pred, tuple): pred = pred[0] # Convert to numpy label = y.squeeze(dim=1).cpu().numpy() argpred = np.argmax(pred.data.cpu().numpy(), axis=1) # Add to evaluator self.evaluator.add_batch(label, argpred) # Tensorboard images vis_imgs = 2 images_inv = inv_preprocess(x.clone().cpu(), vis_imgs, numpy_transform=True) labels_colors = decode_labels(label, vis_imgs) preds_colors = decode_labels(argpred, vis_imgs) for index, (img, lab, predc) in enumerate( zip(images_inv, labels_colors, preds_colors)): self.writer.add_image(str(index) + '/images', img, self.epoch) self.writer.add_image(str(index) + '/labels', lab, self.epoch) self.writer.add_image(str(index) + '/preds', predc, self.epoch) # Calculate and log if self.cfg.data.source.kwargs.class_16: PA = self.evaluator.Pixel_Accuracy() MPA_16, MPA_13 = self.evaluator.Mean_Pixel_Accuracy() MIoU_16, MIoU_13 = self.evaluator.Mean_Intersection_over_Union() FWIoU_16, FWIoU_13 = self.evaluator.Frequency_Weighted_Intersection_over_Union( ) PC_16, PC_13 = self.evaluator.Mean_Precision() self.logger.info( 'Epoch:{:.3f}, PA:{:.3f}, MPA_16:{:.3f}, MIoU_16:{:.3f}, FWIoU_16:{:.3f}, PC_16:{:.3f}' .format(self.epoch, PA, MPA_16, MIoU_16, FWIoU_16, PC_16)) self.logger.info( 'Epoch:{:.3f}, PA:{:.3f}, MPA_13:{:.3f}, MIoU_13:{:.3f}, FWIoU_13:{:.3f}, PC_13:{:.3f}' .format(self.epoch, PA, MPA_13, MIoU_13, FWIoU_13, PC_13)) self.writer.add_scalar('PA', PA, self.epoch) self.writer.add_scalar('MPA_16', MPA_16, self.epoch) self.writer.add_scalar('MIoU_16', MIoU_16, self.epoch) self.writer.add_scalar('FWIoU_16', FWIoU_16, self.epoch) self.writer.add_scalar('MPA_13', MPA_13, self.epoch) self.writer.add_scalar('MIoU_13', MIoU_13, self.epoch) self.writer.add_scalar('FWIoU_13', FWIoU_13, self.epoch) PA, MPA, MIoU, FWIoU = PA, MPA_13, MIoU_13, FWIoU_13 else: PA = self.evaluator.Pixel_Accuracy() MPA = self.evaluator.Mean_Pixel_Accuracy() MIoU = self.evaluator.Mean_Intersection_over_Union() FWIoU = self.evaluator.Frequency_Weighted_Intersection_over_Union() PC = self.evaluator.Mean_Precision() self.logger.info( 'Epoch:{:.3f}, PA1:{:.3f}, MPA1:{:.3f}, MIoU1:{:.3f}, FWIoU1:{:.3f}, PC:{:.3f}' .format(self.epoch, PA, MPA, MIoU, FWIoU, PC)) self.writer.add_scalar('PA', PA, self.epoch) self.writer.add_scalar('MPA', MPA, self.epoch) self.writer.add_scalar('MIoU', MIoU, self.epoch) self.writer.add_scalar('FWIoU', FWIoU, self.epoch) return PA, MPA, MIoU, FWIoU def save_checkpoint(self, filename='checkpoint.pth'): torch.save( { 'epoch': self.epoch + 1, 'iter': self.iter, 'state_dict': self.ema.model.state_dict(), 'shadow': self.ema.shadow, 'optimizer': self.optimizer.state_dict(), 'best_MIou': self.best_MIou }, filename) def load_checkpoint(self, filename): checkpoint = torch.load(filename, map_location='cpu') # Get model state dict if not self.cfg.train and 'shadow' in checkpoint: state_dict = checkpoint['shadow'] elif 'state_dict' in checkpoint: state_dict = checkpoint['state_dict'] else: state_dict = checkpoint # Remove DP/DDP if it exists state_dict = { k.replace('module.', ''): v for k, v in state_dict.items() } # Load state dict if hasattr(self.model, 'module'): self.model.module.load_state_dict(state_dict) else: self.model.load_state_dict(state_dict) self.logger.info(f"Model loaded successfully from {filename}") # Load optimizer and epoch if self.cfg.train and self.cfg.model.resume_from_checkpoint: if 'optimizer' in checkpoint: self.optimizer.load_state_dict(checkpoint['optimizer']) self.logger.info( f"Optimizer loaded successfully from {filename}") if 'epoch' in checkpoint and 'iter' in checkpoint: self.epoch = checkpoint['epoch'] self.iter = checkpoint[ 'iter'] if 'iter' in checkpoint else checkpoint['iteration'] self.logger.info( f"Resuming training from epoch {self.epoch} iter {self.iter}" ) else: self.logger.info(f"Did not resume optimizer") def poly_lr_scheduler(self, optimizer, init_lr=None, iter=None, max_iter=None, power=None): init_lr = self.cfg.opt.lr if init_lr is None else init_lr iter = self.iter if iter is None else iter max_iter = self.cfg.opt.iterations if max_iter is None else max_iter power = self.cfg.opt.poly_power if power is None else power new_lr = init_lr * (1 - float(iter) / max_iter)**power optimizer.param_groups[0]["lr"] = new_lr if len(optimizer.param_groups) == 2: optimizer.param_groups[1]["lr"] = 10 * new_lr
def __init__(self, args, cuda=None, train_id=None, logger=None): self.args = args os.environ["CUDA_VISIBLE_DEVICES"] = self.args.gpu self.cuda = cuda and torch.cuda.is_available() self.device = torch.device('cuda' if self.cuda else 'cpu') self.current_MIoU = 0 self.best_MIou = 0 self.current_epoch = 0 self.current_iter = 0 self.train_id = train_id self.logger = logger # set TensorboardX self.writer = SummaryWriter(self.args.checkpoint_dir) # Metric definition self.Eval = Eval(self.args.num_classes) # loss definition self.loss = nn.CrossEntropyLoss(ignore_index=-1) self.loss.to(self.device) # model self.model, params = get_model(self.args) self.model = nn.DataParallel(self.model, device_ids=[0]) self.model.to(self.device) # load pretrained checkpoint if self.args.pretrained_ckpt_file is not None: path1 = os.path.join(*self.args.checkpoint_dir.split('/')[:-1], self.train_id + 'best.pth') path2 = self.args.pretrained_ckpt_file if os.path.exists(path1): pretrained_ckpt_file = path1 elif os.path.exists(path2): pretrained_ckpt_file = path2 else: raise AssertionError("no pretrained_ckpt_file") self.load_checkpoint(pretrained_ckpt_file) # dataloader self.dataloader = City_DataLoader( self.args ) if self.args.dataset == "cityscapes" else GTA5_DataLoader(self.args) if self.args.city_name != "None": target_data_set = CrossCity_Dataset( self.args, data_root_path=self.args.data_root_path, list_path=self.args.list_path, split='val', base_size=self.args.target_base_size, crop_size=self.args.target_crop_size, class_13=self.args.class_13) self.target_val_dataloader = data.DataLoader( target_data_set, batch_size=self.args.batch_size, shuffle=False, num_workers=self.args.data_loader_workers, pin_memory=self.args.pin_memory, drop_last=True) self.dataloader.val_loader = self.target_val_dataloader self.dataloader.valid_iterations = ( len(target_data_set) + self.args.batch_size) // self.args.batch_size else: self.dataloader.val_loader = self.dataloader.data_loader self.dataloader.valid_iterations = min( self.dataloader.num_iterations, 500) self.epoch_num = ceil(self.args.iter_max / self.dataloader.num_iterations)
class Trainer(): def __init__(self, args, cuda=None, train_id="None", logger=None): self.args = args os.environ["CUDA_VISIBLE_DEVICES"] = self.args.gpu self.cuda = cuda and torch.cuda.is_available() self.device = torch.device('cuda' if self.cuda else 'cpu') self.train_id = train_id self.logger = logger self.current_MIoU = 0 self.best_MIou = 0 self.best_source_MIou = 0 self.current_epoch = 0 self.current_iter = 0 self.second_best_MIou = 0 # set TensorboardX self.writer = SummaryWriter(self.args.checkpoint_dir) # Metric definition self.Eval = Eval(self.args.num_classes) # loss definition self.loss = nn.CrossEntropyLoss(weight=None, ignore_index= -1) self.loss.to(self.device) # model self.model, params = get_model(self.args) self.model = nn.DataParallel(self.model, device_ids=[0]) self.model.to(self.device) if self.args.optim == "SGD": self.optimizer = torch.optim.SGD( params=params, momentum=self.args.momentum, weight_decay=self.args.weight_decay ) elif self.args.optim == "Adam": self.optimizer = torch.optim.Adam(params, betas=(0.9, 0.99), weight_decay=self.args.weight_decay) # dataloader if self.args.dataset=="cityscapes": self.dataloader = City_DataLoader(self.args) elif self.args.dataset=="gta5": self.dataloader = GTA5_DataLoader(self.args) else: self.dataloader = SYNTHIA_DataLoader(self.args) self.dataloader.num_iterations = min(self.dataloader.num_iterations, ITER_MAX) print(self.args.iter_max, self.dataloader.num_iterations) self.epoch_num = ceil(self.args.iter_max / self.dataloader.num_iterations) if self.args.iter_stop is None else \ ceil(self.args.iter_stop / self.dataloader.num_iterations) def main(self): # display args details self.logger.info("Global configuration as follows:") for key, val in vars(self.args).items(): self.logger.info("{:16} {}".format(key, val)) # choose cuda if self.cuda: current_device = torch.cuda.current_device() self.logger.info("This model will run on {}".format(torch.cuda.get_device_name(current_device))) else: self.logger.info("This model will run on CPU") # load pretrained checkpoint if self.args.pretrained_ckpt_file is not None: if os.path.isdir(self.args.pretrained_ckpt_file): self.args.pretrained_ckpt_file = os.path.join(self.args.checkpoint_dir, self.train_id + 'best.pth') self.load_checkpoint(self.args.pretrained_ckpt_file) if self.args.continue_training: self.load_checkpoint(os.path.join(self.args.checkpoint_dir, self.train_id + 'best.pth')) self.best_iter = self.current_iter self.best_source_iter = self.current_iter else: self.current_epoch = 0 # train self.train() self.writer.close() def train(self): # self.validate() # check image summary for epoch in tqdm(range(self.current_epoch, self.epoch_num), desc="Total {} epochs".format(self.epoch_num)): self.train_one_epoch() # validate PA, MPA, MIoU, FWIoU = self.validate() self.writer.add_scalar('PA', PA, self.current_epoch) self.writer.add_scalar('MPA', MPA, self.current_epoch) self.writer.add_scalar('MIoU', MIoU, self.current_epoch) self.writer.add_scalar('FWIoU', FWIoU, self.current_epoch) self.current_MIoU = MIoU is_best = MIoU > self.best_MIou if is_best: self.best_MIou = MIoU self.best_iter = self.current_iter self.logger.info("=>saving a new best checkpoint...") self.save_checkpoint(self.train_id+'best.pth') else: self.logger.info("=> The MIoU of val does't improve.") self.logger.info("=> The best MIoU of val is {} at {}".format(self.best_MIou, self.best_iter)) self.current_epoch += 1 state = { 'epoch': self.current_epoch + 1, 'iteration': self.current_iter, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_MIou': self.current_MIoU } self.logger.info("=>best_MIou {} at {}".format(self.best_MIou, self.best_iter)) self.logger.info("=>saving the final checkpoint to " + os.path.join(self.args.checkpoint_dir, self.train_id+'final.pth')) self.save_checkpoint(self.train_id+'final.pth') def train_one_epoch(self): tqdm_epoch = tqdm(self.dataloader.data_loader, total=self.dataloader.num_iterations, desc="Train Epoch-{}-total-{}".format(self.current_epoch+1, self.epoch_num)) self.logger.info("Training one epoch...") self.Eval.reset() train_loss = [] loss_seg_value_2 = 0 iter_num = self.dataloader.num_iterations if self.args.freeze_bn: self.model.eval() self.logger.info("freeze bacth normalization successfully!") else: self.model.train() # Initialize your average meters batch_idx = 0 for x, y, _ in tqdm_epoch: self.poly_lr_scheduler( optimizer=self.optimizer, init_lr=self.args.lr, iter=self.current_iter, max_iter=self.args.iter_max, power=self.args.poly_power, ) if self.args.iter_stop is not None and self.current_iter >= self.args.iter_stop: self.logger.info("iteration arrive {}(early stop)/{}(total step)!".format(self.args.iter_stop, self.args.iter_max)) break if self.current_iter >= self.args.iter_max: self.logger.info("iteration arrive {}!".format(self.args.iter_max)) break self.writer.add_scalar('learning_rate', self.optimizer.param_groups[0]["lr"], self.current_iter) if self.cuda: x, y = x.to(self.device), y.to(device=self.device, dtype=torch.long) y = torch.squeeze(y, 1) self.optimizer.zero_grad() # model pred = self.model(x) if isinstance(pred, tuple): pred_2 = pred[1] pred = pred[0] # loss cur_loss = self.loss(pred, y) if self.args.multi: loss_2 = self.args.lambda_seg * self.loss(pred_2, y) cur_loss += loss_2 loss_seg_value_2 += loss_2.cpu().item() / iter_num # optimizer cur_loss.backward() self.optimizer.step() train_loss.append(cur_loss.item()) if batch_idx % 1000 == 0: if self.args.multi: self.logger.info("The train loss of epoch{}-batch-{}:{};{}".format(self.current_epoch, batch_idx, cur_loss.item(), loss_2.item())) else: self.logger.info("The train loss of epoch{}-batch-{}:{}".format(self.current_epoch, batch_idx, cur_loss.item())) batch_idx += 1 self.current_iter += 1 if np.isnan(float(cur_loss.item())): raise ValueError('Loss is nan during training...') pred = pred.data.cpu().numpy() label = y.cpu().numpy() argpred = np.argmax(pred, axis=1) self.Eval.add_batch(label, argpred) if batch_idx==self.dataloader.num_iterations: break self.log_one_train_epoch(x, label, argpred, train_loss) tqdm_epoch.close() def log_one_train_epoch(self, x, label, argpred, train_loss): #show train image on tensorboard images_inv = inv_preprocess(x.clone().cpu(), self.args.show_num_images, numpy_transform=self.args.numpy_transform) labels_colors = decode_labels(label, self.args.show_num_images) preds_colors = decode_labels(argpred, self.args.show_num_images) for index, (img, lab, color_pred) in enumerate(zip(images_inv, labels_colors, preds_colors)): self.writer.add_image('train/'+ str(index)+'/Images', img, self.current_epoch) self.writer.add_image('train/'+ str(index)+'/Labels', lab, self.current_epoch) self.writer.add_image('train/'+ str(index)+'/preds', color_pred, self.current_epoch) if self.args.class_16: PA = self.Eval.Pixel_Accuracy() MPA_16, MPA = self.Eval.Mean_Pixel_Accuracy() MIoU_16, MIoU = self.Eval.Mean_Intersection_over_Union() FWIoU_16, FWIoU = self.Eval.Frequency_Weighted_Intersection_over_Union() else: PA = self.Eval.Pixel_Accuracy() MPA = self.Eval.Mean_Pixel_Accuracy() MIoU = self.Eval.Mean_Intersection_over_Union() FWIoU = self.Eval.Frequency_Weighted_Intersection_over_Union() self.logger.info('\nEpoch:{}, train PA1:{}, MPA1:{}, MIoU1:{}, FWIoU1:{}'.format(self.current_epoch, PA, MPA, MIoU, FWIoU)) self.writer.add_scalar('train_PA', PA, self.current_epoch) self.writer.add_scalar('train_MPA', MPA, self.current_epoch) self.writer.add_scalar('train_MIoU', MIoU, self.current_epoch) self.writer.add_scalar('train_FWIoU', FWIoU, self.current_epoch) tr_loss = sum(train_loss)/len(train_loss) if isinstance(train_loss, list) else train_loss self.writer.add_scalar('train_loss', tr_loss, self.current_epoch) tqdm.write("The average loss of train epoch-{}-:{}".format(self.current_epoch, tr_loss)) def validate(self, mode='val'): self.logger.info('\nvalidating one epoch...') self.Eval.reset() with torch.no_grad(): tqdm_batch = tqdm(self.dataloader.val_loader, total=self.dataloader.valid_iterations, desc="Val Epoch-{}-".format(self.current_epoch + 1)) if mode == 'val': self.model.eval() i = 0 for x, y, id in tqdm_batch: if self.cuda: x, y = x.to(self.device), y.to(device=self.device, dtype=torch.long) # model pred = self.model(x) if isinstance(pred, tuple): pred_2 = pred[1] pred = pred[0] pred_P = F.softmax(pred, dim=1) pred_P_2 = F.softmax(pred_2, dim=1) y = torch.squeeze(y, 1) pred = pred.data.cpu().numpy() label = y.cpu().numpy() argpred = np.argmax(pred, axis=1) self.Eval.add_batch(label, argpred) #show val result on tensorboard images_inv = inv_preprocess(x.clone().cpu(), self.args.show_num_images, numpy_transform=self.args.numpy_transform) labels_colors = decode_labels(label, self.args.show_num_images) preds_colors = decode_labels(argpred, self.args.show_num_images) for index, (img, lab, color_pred) in enumerate(zip(images_inv, labels_colors, preds_colors)): self.writer.add_image(str(index)+'/Images', img, self.current_epoch) self.writer.add_image(str(index)+'/Labels', lab, self.current_epoch) self.writer.add_image(str(index)+'/preds', color_pred, self.current_epoch) if self.args.class_16: def val_info(Eval, name): PA = Eval.Pixel_Accuracy() MPA_16, MPA_13 = Eval.Mean_Pixel_Accuracy() MIoU_16, MIoU_13 = Eval.Mean_Intersection_over_Union() FWIoU_16, FWIoU_13 = Eval.Frequency_Weighted_Intersection_over_Union() PC_16, PC_13 = Eval.Mean_Precision() print("########## Eval{} ############".format(name)) self.logger.info('\nEpoch:{:.3f}, {} PA:{:.3f}, MPA_16:{:.3f}, MIoU_16:{:.3f}, FWIoU_16:{:.3f}, PC_16:{:.3f}'.format(self.current_epoch, name, PA, MPA_16, MIoU_16, FWIoU_16, PC_16)) self.logger.info('\nEpoch:{:.3f}, {} PA:{:.3f}, MPA_13:{:.3f}, MIoU_13:{:.3f}, FWIoU_13:{:.3f}, PC_13:{:.3f}'.format(self.current_epoch, name, PA, MPA_13, MIoU_13, FWIoU_13, PC_13)) self.writer.add_scalar('PA'+name, PA, self.current_epoch) self.writer.add_scalar('MPA_16'+name, MPA_16, self.current_epoch) self.writer.add_scalar('MIoU_16'+name, MIoU_16, self.current_epoch) self.writer.add_scalar('FWIoU_16'+name, FWIoU_16, self.current_epoch) self.writer.add_scalar('MPA_13'+name, MPA_13, self.current_epoch) self.writer.add_scalar('MIoU_13'+name, MIoU_13, self.current_epoch) self.writer.add_scalar('FWIoU_13'+name, FWIoU_13, self.current_epoch) return PA, MPA_13, MIoU_13, FWIoU_13 else: def val_info(Eval, name): PA = Eval.Pixel_Accuracy() MPA = Eval.Mean_Pixel_Accuracy() MIoU = Eval.Mean_Intersection_over_Union() FWIoU = Eval.Frequency_Weighted_Intersection_over_Union() PC = Eval.Mean_Precision() print("########## Eval{} ############".format(name)) self.logger.info('\nEpoch:{:.3f}, {} PA1:{:.3f}, MPA1:{:.3f}, MIoU1:{:.3f}, FWIoU1:{:.3f}, PC:{:.3f}'.format(self.current_epoch, name, PA, MPA, MIoU, FWIoU, PC)) self.writer.add_scalar('PA'+name, PA, self.current_epoch) self.writer.add_scalar('MPA'+name, MPA, self.current_epoch) self.writer.add_scalar('MIoU'+name, MIoU, self.current_epoch) self.writer.add_scalar('FWIoU'+name, FWIoU, self.current_epoch) return PA, MPA, MIoU, FWIoU PA, MPA, MIoU, FWIoU = val_info(self.Eval, "") tqdm_batch.close() return PA, MPA, MIoU, FWIoU def validate_source(self): self.logger.info('\nvalidating source domain...') self.Eval.reset() with torch.no_grad(): tqdm_batch = tqdm(self.source_val_dataloader, total=self.dataloader.valid_iterations, desc="Source Val Epoch-{}-".format(self.current_epoch + 1)) self.model.eval() i = 0 for x, y, id in tqdm_batch: # y.to(torch.long) if self.cuda: x, y = x.to(self.device), y.to(device=self.device, dtype=torch.long) # model pred = self.model(x) if isinstance(pred, tuple): pred_2 = pred[1] pred = pred[0] pred_P = F.softmax(pred, dim=1) pred_P_2 = F.softmax(pred_2, dim=1) y = torch.squeeze(y, 1) pred = pred.data.cpu().numpy() label = y.cpu().numpy() argpred = np.argmax(pred, axis=1) self.Eval.add_batch(label, argpred) i += 1 if i == self.dataloader.valid_iterations: break #show val result on tensorboard images_inv = inv_preprocess(x.clone().cpu(), self.args.show_num_images, numpy_transform=self.args.numpy_transform) labels_colors = decode_labels(label, self.args.show_num_images) preds_colors = decode_labels(argpred, self.args.show_num_images) for index, (img, lab, color_pred) in enumerate(zip(images_inv, labels_colors, preds_colors)): self.writer.add_image('source_eval/'+str(index)+'/Images', img, self.current_epoch) self.writer.add_image('source_eval/'+str(index)+'/Labels', lab, self.current_epoch) self.writer.add_image('source_eval/'+str(index)+'/preds', color_pred, self.current_epoch) if self.args.class_16: def source_val_info(Eval, name): PA = Eval.Pixel_Accuracy() MPA_16, MPA_13 = Eval.Mean_Pixel_Accuracy() MIoU_16, MIoU_13 = Eval.Mean_Intersection_over_Union() FWIoU_16, FWIoU_13 = Eval.Frequency_Weighted_Intersection_over_Union() PC_16, PC_13 = Eval.Mean_Precision() print("########## Source Eval{} ############".format(name)) self.logger.info('\nEpoch:{:.3f}, source {} PA:{:.3f}, MPA_16:{:.3f}, MIoU_16:{:.3f}, FWIoU_16:{:.3f}, PC_16:{:.3f}'.format(self.current_epoch, name, PA, MPA_16, MIoU_16, FWIoU_16, PC_16)) self.logger.info('\nEpoch:{:.3f}, source {} PA:{:.3f}, MPA_13:{:.3f}, MIoU_13:{:.3f}, FWIoU_13:{:.3f}, PC_13:{:.3f}'.format(self.current_epoch, name, PA, MPA_13, MIoU_13, FWIoU_13, PC_13)) self.writer.add_scalar('source_PA'+name, PA, self.current_epoch) self.writer.add_scalar('source_MPA_16'+name, MPA_16, self.current_epoch) self.writer.add_scalar('source_MIoU_16'+name, MIoU_16, self.current_epoch) self.writer.add_scalar('source_FWIoU_16'+name, FWIoU_16, self.current_epoch) self.writer.add_scalar('source_MPA_13'+name, MPA_13, self.current_epoch) self.writer.add_scalar('source_MIoU_13'+name, MIoU_13, self.current_epoch) self.writer.add_scalar('source_FWIoU_13'+name, FWIoU_13, self.current_epoch) return PA, MPA_13, MIoU_13, FWIoU_13 else: def source_val_info(Eval, name): PA = Eval.Pixel_Accuracy() MPA = Eval.Mean_Pixel_Accuracy() MIoU = Eval.Mean_Intersection_over_Union() FWIoU = Eval.Frequency_Weighted_Intersection_over_Union() PC = Eval.Mean_Precision() self.writer.add_scalar('source_PA'+name, PA, self.current_epoch) self.writer.add_scalar('source_MPA'+name, MPA, self.current_epoch) self.writer.add_scalar('source_MIoU'+name, MIoU, self.current_epoch) self.writer.add_scalar('source_FWIoU'+name, FWIoU, self.current_epoch) print("########## Source Eval{} ############".format(name)) self.logger.info('\nEpoch:{:.3f}, source {} PA1:{:.3f}, MPA1:{:.3f}, MIoU1:{:.3f}, FWIoU1:{:.3f}, PC:{:.3f}'.format(self.current_epoch, name, PA, MPA, MIoU, FWIoU, PC)) return PA, MPA, MIoU, FWIoU PA, MPA, MIoU, FWIoU = source_val_info(self.Eval, "") tqdm_batch.close() is_best = MIoU > self.best_source_MIou if is_best: self.best_source_MIou = MIoU self.best_source_iter = self.current_iter self.logger.info("=>saving a new best source checkpoint...") self.save_checkpoint(self.train_id+'source_best.pth') else: self.logger.info("=> The source MIoU of val does't improve.") self.logger.info("=> The best source MIoU of val is {} at {}".format(self.best_source_MIou, self.best_source_iter)) return PA, MPA, MIoU, FWIoU def save_checkpoint(self, filename=None): """ Save checkpoint if a new best is achieved :param state: :param is_best: :param filepath: :return: """ filename = os.path.join(self.args.checkpoint_dir, filename) state = { 'epoch': self.current_epoch + 1, 'iteration': self.current_iter, 'state_dict': self.model.state_dict(), 'optimizer': self.optimizer.state_dict(), 'best_MIou':self.best_MIou } torch.save(state, filename) def load_checkpoint(self, filename): try: self.logger.info("Loading checkpoint '{}'".format(filename)) checkpoint = torch.load(filename) if 'state_dict' in checkpoint: self.model.load_state_dict(checkpoint['state_dict']) else: self.model.module.load_state_dict(checkpoint) self.logger.info("Checkpoint loaded successfully from "+filename) except OSError as e: self.logger.info("No checkpoint exists from '{}'. Skipping...".format(self.args.checkpoint_dir)) self.logger.info("**First time to train**") def poly_lr_scheduler(self, optimizer, init_lr=None, iter=None, max_iter=None, power=None): init_lr = self.args.lr if init_lr is None else init_lr iter = self.current_iter if iter is None else iter max_iter = self.args.iter_max if max_iter is None else max_iter power = self.args.poly_power if power is None else power new_lr = init_lr * (1 - float(iter) / max_iter) ** power optimizer.param_groups[0]["lr"] = new_lr if len(optimizer.param_groups) == 2: optimizer.param_groups[1]["lr"] = 10 * new_lr