def __init__(self, args): self.cfg = DeployConfig(args.cfg) self.args = args self.compose = T.Compose(self.cfg.transforms) resize_h, resize_w = args.input_shape self.disflow = cv2.DISOpticalFlow_create( cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) self.prev_gray = np.zeros((resize_h, resize_w), np.uint8) self.prev_cfd = np.zeros((resize_h, resize_w), np.float32) self.is_init = True pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) pred_cfg.disable_glog_info() if self.args.use_gpu: pred_cfg.enable_use_gpu(100, 0) self.predictor = create_predictor(pred_cfg) if self.args.test_speed: self.cost_averager = TimeAverager()
def train(model, train_dataset, val_dataset=None, optimizer=None, save_dir='output', iters=10000, batch_size=2, resume_model=None, save_interval=1000, log_iters=10, num_workers=0, use_vdl=False, losses=None, keep_checkpoint_max=5, eval_begin_iters=None): """ Launch training. Args: model(nn.Layer): A matting model. train_dataset (paddle.io.Dataset): Used to read and process training datasets. val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets. optimizer (paddle.optimizer.Optimizer): The optimizer. save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'. iters (int, optional): How may iters to train the model. Defualt: 10000. batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2. resume_model (str, optional): The path of resume model. save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000. log_iters (int, optional): Display logging information at every log_iters. Default: 10. num_workers (int, optional): Num workers for data loader. Default: 0. use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False. losses (dict, optional): A dict of loss, refer to the loss function of the model for details. Default: None. keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5. eval_begin_iters (int): The iters begin evaluation. It will evaluate at iters/2 if it is None. Defalust: None. """ model.train() nranks = paddle.distributed.ParallelEnv().nranks local_rank = paddle.distributed.ParallelEnv().local_rank start_iter = 0 if resume_model is not None: start_iter = resume(model, optimizer, resume_model) if not os.path.isdir(save_dir): if os.path.exists(save_dir): os.remove(save_dir) os.makedirs(save_dir) if nranks > 1: # Initialize parallel environment if not done. if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized( ): paddle.distributed.init_parallel_env() ddp_model = paddle.DataParallel(model) else: ddp_model = paddle.DataParallel(model) batch_sampler = paddle.io.DistributedBatchSampler(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) loader = paddle.io.DataLoader( train_dataset, batch_sampler=batch_sampler, num_workers=num_workers, return_list=True, ) if use_vdl: from visualdl import LogWriter log_writer = LogWriter(save_dir) avg_loss = defaultdict(float) iters_per_epoch = len(batch_sampler) best_sad = np.inf best_model_iter = -1 reader_cost_averager = TimeAverager() batch_cost_averager = TimeAverager() save_models = deque() batch_start = time.time() iter = start_iter while iter < iters: for data in loader: iter += 1 if iter > iters: break reader_cost_averager.record(time.time() - batch_start) # model input if nranks > 1: logit_dict = ddp_model(data) else: logit_dict = model(data) loss_dict = model.loss(logit_dict, data, losses) loss_dict['all'].backward() optimizer.step() lr = optimizer.get_lr() if isinstance(optimizer._learning_rate, paddle.optimizer.lr.LRScheduler): optimizer._learning_rate.step() model.clear_gradients() for key, value in loss_dict.items(): avg_loss[key] += value.numpy()[0] batch_cost_averager.record(time.time() - batch_start, num_samples=batch_size) if (iter) % log_iters == 0 and local_rank == 0: for key, value in avg_loss.items(): avg_loss[key] = value / log_iters remain_iters = iters - iter avg_train_batch_cost = batch_cost_averager.get_average() avg_train_reader_cost = reader_cost_averager.get_average() eta = calculate_eta(remain_iters, avg_train_batch_cost) logger.info( "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.5f}, ips={:.4f} samples/sec | ETA {}" .format((iter - 1) // iters_per_epoch + 1, iter, iters, avg_loss['all'], lr, avg_train_batch_cost, avg_train_reader_cost, batch_cost_averager.get_ips_average(), eta)) # print loss loss_str = '[TRAIN] [LOSS] ' loss_str = loss_str + 'all={:.4f}'.format(avg_loss['all']) for key, value in avg_loss.items(): if key != 'all': loss_str = loss_str + ' ' + key + '={:.4f}'.format( value) logger.info(loss_str) if use_vdl: for key, value in avg_loss.items(): log_tag = 'Train/' + key log_writer.add_scalar(log_tag, value, iter) log_writer.add_scalar('Train/lr', lr, iter) log_writer.add_scalar('Train/batch_cost', avg_train_batch_cost, iter) log_writer.add_scalar('Train/reader_cost', avg_train_reader_cost, iter) for key in avg_loss.keys(): avg_loss[key] = 0. reader_cost_averager.reset() batch_cost_averager.reset() # save model if (iter % save_interval == 0 or iter == iters) and local_rank == 0: current_save_dir = os.path.join(save_dir, "iter_{}".format(iter)) if not os.path.isdir(current_save_dir): os.makedirs(current_save_dir) paddle.save(model.state_dict(), os.path.join(current_save_dir, 'model.pdparams')) paddle.save(optimizer.state_dict(), os.path.join(current_save_dir, 'model.pdopt')) save_models.append(current_save_dir) if len(save_models) > keep_checkpoint_max > 0: model_to_remove = save_models.popleft() shutil.rmtree(model_to_remove) # eval model if eval_begin_iters is None: eval_begin_iters = iters // 2 if (iter % save_interval == 0 or iter == iters) and ( val_dataset is not None ) and local_rank == 0 and iter >= eval_begin_iters: num_workers = 1 if num_workers > 0 else 0 sad, mse = evaluate(model, val_dataset, num_workers=0, print_detail=True, save_results=False) model.train() # save best model and add evaluation results to vdl if (iter % save_interval == 0 or iter == iters) and local_rank == 0: if val_dataset is not None and iter >= eval_begin_iters: if sad < best_sad: best_sad = sad best_model_iter = iter best_model_dir = os.path.join(save_dir, "best_model") paddle.save( model.state_dict(), os.path.join(best_model_dir, 'model.pdparams')) logger.info( '[EVAL] The model with the best validation sad ({:.4f}) was saved at iter {}.' .format(best_sad, best_model_iter)) if use_vdl: log_writer.add_scalar('Evaluate/SAD', sad, iter) log_writer.add_scalar('Evaluate/MSE', mse, iter) batch_start = time.time() # Sleep for half a second to let dataloader release resources. time.sleep(0.5) if use_vdl: log_writer.close()
def predict(model, model_path, transforms, image_list, image_dir=None, trimap_list=None, save_dir='output'): """ predict and visualize the image_list. Args: model (nn.Layer): Used to predict for input image. model_path (str): The path of pretrained model. transforms (transforms.Compose): Preprocess for input image. image_list (list): A list of image path to be predicted. image_dir (str, optional): The root directory of the images predicted. Default: None. trimap_list (list, optional): A list of trimap of image_list. Default: None. save_dir (str, optional): The directory to save the visualized results. Default: 'output'. """ utils.utils.load_entire_model(model, model_path) model.eval() nranks = paddle.distributed.get_world_size() local_rank = paddle.distributed.get_rank() if nranks > 1: img_lists = partition_list(image_list, nranks) trimap_lists = partition_list( trimap_list, nranks) if trimap_list is not None else None else: img_lists = [image_list] trimap_lists = [trimap_list] if trimap_list is not None else None logger.info("Start to predict...") progbar_pred = progbar.Progbar(target=len(img_lists[0]), verbose=1) preprocess_cost_averager = TimeAverager() infer_cost_averager = TimeAverager() postprocess_cost_averager = TimeAverager() batch_start = time.time() with paddle.no_grad(): for i, im_path in enumerate(img_lists[local_rank]): preprocess_start = time.time() trimap = trimap_lists[local_rank][ i] if trimap_list is not None else None data = preprocess(img=im_path, transforms=transforms, trimap=trimap) preprocess_cost_averager.record(time.time() - preprocess_start) infer_start = time.time() alpha_pred = model(data) infer_cost_averager.record(time.time() - infer_start) postprocess_start = time.time() alpha_pred = reverse_transform(alpha_pred, data['trans_info']) alpha_pred = (alpha_pred.numpy()).squeeze() alpha_pred = (alpha_pred * 255).astype('uint8') # get the saved name if image_dir is not None: im_file = im_path.replace(image_dir, '') else: im_file = os.path.basename(im_path) if im_file[0] == '/' or im_file[0] == '\\': im_file = im_file[1:] save_path = os.path.join(save_dir, im_file) mkdir(save_path) save_alpha_pred(alpha_pred, save_path, trimap=trimap) postprocess_cost_averager.record(time.time() - postprocess_start) preprocess_cost = preprocess_cost_averager.get_average() infer_cost = infer_cost_averager.get_average() postprocess_cost = postprocess_cost_averager.get_average() if local_rank == 0: progbar_pred.update(i + 1, [('preprocess_cost', preprocess_cost), ('infer_cost cost', infer_cost), ('postprocess_cost', postprocess_cost)]) preprocess_cost_averager.reset() infer_cost_averager.reset() postprocess_cost_averager.reset() return alpha_pred
def evaluate(model, eval_dataset, aug_eval=False, scales=1.0, flip_horizontal=True, flip_vertical=False, is_slide=False, stride=None, crop_size=None, num_workers=0, print_detail=True): """ Launch evalution. Args: model(nn.Layer): A sementic segmentation model. eval_dataset (paddle.io.Dataset): Used to read and process validation datasets. aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False. scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0. flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_eval` is True. Default: True. flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_eval` is True. Default: False. is_slide (bool, optional): Whether to evaluate by sliding window. Default: False. stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height. It should be provided when `is_slide` is True. crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height. It should be provided when `is_slide` is True. num_workers (int, optional): Num workers for data loader. Default: 0. print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True. Returns: float: The mIoU of validation datasets. float: The accuracy of validation datasets. """ model.eval() nranks = paddle.distributed.ParallelEnv().nranks local_rank = paddle.distributed.ParallelEnv().local_rank if nranks > 1: # Initialize parallel environment if not done. if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized( ): paddle.distributed.init_parallel_env() batch_sampler = paddle.io.DistributedBatchSampler(eval_dataset, batch_size=1, shuffle=False, drop_last=False) loader = paddle.io.DataLoader( eval_dataset, batch_sampler=batch_sampler, num_workers=num_workers, return_list=True, ) total_iters = len(loader) intersect_area_all = 0 pred_area_all = 0 label_area_all = 0 if print_detail: logger.info( "Start evaluating (total_samples={}, total_iters={})...".format( len(eval_dataset), total_iters)) progbar_val = progbar.Progbar(target=total_iters, verbose=1) reader_cost_averager = TimeAverager() batch_cost_averager = TimeAverager() batch_start = time.time() with paddle.no_grad(): for iter, (im, label) in enumerate(loader): reader_cost_averager.record(time.time() - batch_start) label = label.astype('int64') ori_shape = label.shape[-2:] if aug_eval: pred = infer.aug_inference( model, im, ori_shape=ori_shape, transforms=eval_dataset.transforms.transforms, scales=scales, flip_horizontal=flip_horizontal, flip_vertical=flip_vertical, is_slide=is_slide, stride=stride, crop_size=crop_size) else: pred = infer.inference( model, im, ori_shape=ori_shape, transforms=eval_dataset.transforms.transforms, is_slide=is_slide, stride=stride, crop_size=crop_size) intersect_area, pred_area, label_area = metrics.calculate_area( pred, label, eval_dataset.num_classes, ignore_index=eval_dataset.ignore_index) # Gather from all ranks if nranks > 1: intersect_area_list = [] pred_area_list = [] label_area_list = [] paddle.distributed.all_gather(intersect_area_list, intersect_area) paddle.distributed.all_gather(pred_area_list, pred_area) paddle.distributed.all_gather(label_area_list, label_area) # Some image has been evaluated and should be eliminated in last iter if (iter + 1) * nranks > len(eval_dataset): valid = len(eval_dataset) - iter * nranks intersect_area_list = intersect_area_list[:valid] pred_area_list = pred_area_list[:valid] label_area_list = label_area_list[:valid] for i in range(len(intersect_area_list)): intersect_area_all = intersect_area_all + intersect_area_list[ i] pred_area_all = pred_area_all + pred_area_list[i] label_area_all = label_area_all + label_area_list[i] else: intersect_area_all = intersect_area_all + intersect_area pred_area_all = pred_area_all + pred_area label_area_all = label_area_all + label_area batch_cost_averager.record(time.time() - batch_start, num_samples=len(label)) batch_cost = batch_cost_averager.get_average() reader_cost = reader_cost_averager.get_average() if local_rank == 0 and print_detail: progbar_val.update(iter + 1, [('batch_cost', batch_cost), ('reader cost', reader_cost)]) reader_cost_averager.reset() batch_cost_averager.reset() batch_start = time.time() class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all, label_area_all) class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all) kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all) if print_detail: logger.info( "[EVAL] #Images={} mIoU={:.4f} Acc={:.4f} Kappa={:.4f} ".format( len(eval_dataset), miou, acc, kappa)) logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4))) logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4))) return miou, acc
def train(model, train_dataset, val_dataset=None, optimizer=None, save_dir='output', iters=10000, batch_size=2, resume_model=None, save_interval=1000, log_iters=10, num_workers=0, use_vdl=False, losses=None, keep_checkpoint_max=5, test_config=None, fp16=False, profiler_options=None): """ Launch training. Args: model(nn.Layer): A sementic segmentation model. train_dataset (paddle.io.Dataset): Used to read and process training datasets. val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets. optimizer (paddle.optimizer.Optimizer): The optimizer. save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'. iters (int, optional): How may iters to train the model. Defualt: 10000. batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2. resume_model (str, optional): The path of resume model. save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000. log_iters (int, optional): Display logging information at every log_iters. Default: 10. num_workers (int, optional): Num workers for data loader. Default: 0. use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False. losses (dict, optional): A dict including 'types' and 'coef'. The length of coef should equal to 1 or len(losses['types']). The 'types' item is a list of object of paddleseg.models.losses while the 'coef' item is a list of the relevant coefficient. keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5. test_config(dict, optional): Evaluation config. fp16 (bool, optional): Whether to use amp. profiler_options (str, optional): The option of train profiler. """ model.train() nranks = paddle.distributed.ParallelEnv().nranks local_rank = paddle.distributed.ParallelEnv().local_rank start_iter = 0 if resume_model is not None: start_iter = resume(model, optimizer, resume_model) if not os.path.isdir(save_dir): if os.path.exists(save_dir): os.remove(save_dir) os.makedirs(save_dir) if nranks > 1: paddle.distributed.fleet.init(is_collective=True) optimizer = paddle.distributed.fleet.distributed_optimizer( optimizer) # The return is Fleet object ddp_model = paddle.distributed.fleet.distributed_model(model) batch_sampler = paddle.io.DistributedBatchSampler(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) loader = paddle.io.DataLoader( train_dataset, batch_sampler=batch_sampler, num_workers=num_workers, return_list=True, worker_init_fn=worker_init_fn, ) # use amp if fp16: logger.info('use amp to train') scaler = paddle.amp.GradScaler(init_loss_scaling=1024) if use_vdl: from visualdl import LogWriter log_writer = LogWriter(save_dir) avg_loss = 0.0 avg_loss_list = [] iters_per_epoch = len(batch_sampler) best_acc = -1.0 best_model_iter = -1 reader_cost_averager = TimeAverager() batch_cost_averager = TimeAverager() save_models = deque() batch_start = time.time() iter = start_iter while iter < iters: for data in loader: iter += 1 if iter > iters: version = paddle.__version__ if version == '2.1.2': continue else: break reader_cost_averager.record(time.time() - batch_start) images = data[0] labels = data[1].astype('int64') edges = None if len(data) == 3: edges = data[2].astype('int64') if hasattr(model, 'data_format') and model.data_format == 'NHWC': images = images.transpose((0, 2, 3, 1)) if fp16: with paddle.amp.auto_cast( enable=True, custom_white_list={ "elementwise_add", "batch_norm", "sync_batch_norm" }, custom_black_list={'bilinear_interp_v2'}): if nranks > 1: logits_list = ddp_model(images) else: logits_list = model(images) loss_list = loss_computation(logits_list=logits_list, labels=labels, losses=losses, edges=edges) loss = sum(loss_list) scaled = scaler.scale(loss) # scale the loss scaled.backward() # do backward if isinstance(optimizer, paddle.distributed.fleet.Fleet): scaler.minimize(optimizer.user_defined_optimizer, scaled) else: scaler.minimize(optimizer, scaled) # update parameters else: if nranks > 1: logits_list = ddp_model(images) else: logits_list = model(images) loss_list = loss_computation(logits_list=logits_list, labels=labels, losses=losses, edges=edges) loss = sum(loss_list) loss.backward() optimizer.step() lr = optimizer.get_lr() # update lr if isinstance(optimizer, paddle.distributed.fleet.Fleet): lr_sche = optimizer.user_defined_optimizer._learning_rate else: lr_sche = optimizer._learning_rate if isinstance(lr_sche, paddle.optimizer.lr.LRScheduler): lr_sche.step() train_profiler.add_profiler_step(profiler_options) model.clear_gradients() avg_loss += loss.numpy()[0] if not avg_loss_list: avg_loss_list = [l.numpy() for l in loss_list] else: for i in range(len(loss_list)): avg_loss_list[i] += loss_list[i].numpy() batch_cost_averager.record(time.time() - batch_start, num_samples=batch_size) if (iter) % log_iters == 0 and local_rank == 0: avg_loss /= log_iters avg_loss_list = [l[0] / log_iters for l in avg_loss_list] remain_iters = iters - iter avg_train_batch_cost = batch_cost_averager.get_average() avg_train_reader_cost = reader_cost_averager.get_average() eta = calculate_eta(remain_iters, avg_train_batch_cost) logger.info( "[TRAIN] epoch: {}, iter: {}/{}, loss: {:.4f}, lr: {:.6f}, batch_cost: {:.4f}, reader_cost: {:.5f}, ips: {:.4f} samples/sec | ETA {}" .format((iter - 1) // iters_per_epoch + 1, iter, iters, avg_loss, lr, avg_train_batch_cost, avg_train_reader_cost, batch_cost_averager.get_ips_average(), eta)) if use_vdl: log_writer.add_scalar('Train/loss', avg_loss, iter) # Record all losses if there are more than 2 losses. if len(avg_loss_list) > 1: avg_loss_dict = {} for i, value in enumerate(avg_loss_list): avg_loss_dict['loss_' + str(i)] = value for key, value in avg_loss_dict.items(): log_tag = 'Train/' + key log_writer.add_scalar(log_tag, value, iter) log_writer.add_scalar('Train/lr', lr, iter) log_writer.add_scalar('Train/batch_cost', avg_train_batch_cost, iter) log_writer.add_scalar('Train/reader_cost', avg_train_reader_cost, iter) avg_loss = 0.0 avg_loss_list = [] reader_cost_averager.reset() batch_cost_averager.reset() if (iter % save_interval == 0 or iter == iters) and (val_dataset is not None): num_workers = 1 if num_workers > 0 else 0 if test_config is None: test_config = {} acc, fp, fn = evaluate(model, val_dataset, num_workers=num_workers, save_dir=save_dir, **test_config) model.train() if (iter % save_interval == 0 or iter == iters) and local_rank == 0: current_save_dir = os.path.join(save_dir, "iter_{}".format(iter)) if not os.path.isdir(current_save_dir): os.makedirs(current_save_dir) paddle.save(model.state_dict(), os.path.join(current_save_dir, 'model.pdparams')) paddle.save(optimizer.state_dict(), os.path.join(current_save_dir, 'model.pdopt')) save_models.append(current_save_dir) if len(save_models) > keep_checkpoint_max > 0: model_to_remove = save_models.popleft() shutil.rmtree(model_to_remove) if val_dataset is not None: if acc > best_acc: best_acc = acc best_model_iter = iter best_model_dir = os.path.join(save_dir, "best_model") paddle.save( model.state_dict(), os.path.join(best_model_dir, 'model.pdparams')) logger.info( '[EVAL] The model with the best validation Acc ({:.4f}) was saved at iter {}.' .format(best_acc, best_model_iter)) if use_vdl: log_writer.add_scalar('Evaluate/Acc', acc, iter) log_writer.add_scalar('Evaluate/Fp', fp, iter) log_writer.add_scalar('Evaluate/Fn', fn, iter) batch_start = time.time() # Calculate flops. if local_rank == 0: _, c, h, w = images.shape _ = paddle.flops( model, [1, c, h, w], custom_ops={paddle.nn.SyncBatchNorm: op_flops_funs.count_syncbn}) # Sleep for half a second to let dataloader release resources. time.sleep(0.5) if use_vdl: log_writer.close()
def train(model, train_dataset, val_dataset=None, optimizer=None, save_dir='output', iters=10000, batch_size=2, resume_model=None, save_interval=1000, log_iters=10, num_workers=0, use_vdl=False, losses=None, keep_checkpoint_max=5, threshold=0.1, nms_kernel=7, top_k=200): """ Launch training. Args: model(nn.Layer): A sementic segmentation model. train_dataset (paddle.io.Dataset): Used to read and process training datasets. val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets. optimizer (paddle.optimizer.Optimizer): The optimizer. save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'. iters (int, optional): How may iters to train the model. Defualt: 10000. batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2. resume_model (str, optional): The path of resume model. save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000. log_iters (int, optional): Display logging information at every log_iters. Default: 10. num_workers (int, optional): Num workers for data loader. Default: 0. use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False. losses (dict): A dict including 'types' and 'coef'. The length of coef should equal to 1 or len(losses['types']). The 'types' item is a list of object of paddleseg.models.losses while the 'coef' item is a list of the relevant coefficient. keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5. threshold (float, optional): A Float, threshold applied to center heatmap score. Default: 0.1. nms_kernel (int, optional): An Integer, NMS max pooling kernel size. Default: 7. top_k (int, optional): An Integer, top k centers to keep. Default: 200. """ model.train() nranks = paddle.distributed.ParallelEnv().nranks local_rank = paddle.distributed.ParallelEnv().local_rank start_iter = 0 if resume_model is not None: start_iter = resume(model, optimizer, resume_model) if not os.path.isdir(save_dir): if os.path.exists(save_dir): os.remove(save_dir) os.makedirs(save_dir) if nranks > 1: # Initialize parallel environment if not done. if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized( ): paddle.distributed.init_parallel_env() ddp_model = paddle.DataParallel(model) else: ddp_model = paddle.DataParallel(model) batch_sampler = paddle.io.DistributedBatchSampler(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) loader = paddle.io.DataLoader( train_dataset, batch_sampler=batch_sampler, num_workers=num_workers, return_list=True, ) if use_vdl: from visualdl import LogWriter log_writer = LogWriter(save_dir) avg_loss = 0.0 avg_loss_list = [] iters_per_epoch = len(batch_sampler) best_pq = -1.0 best_model_iter = -1 reader_cost_averager = TimeAverager() batch_cost_averager = TimeAverager() save_models = deque() batch_start = time.time() iter = start_iter while iter < iters: for data in loader: iter += 1 if iter > iters: break reader_cost_averager.record(time.time() - batch_start) images = data[0] semantic = data[1] semantic_weights = data[2] center = data[3] center_weights = data[4] offset = data[5] offset_weights = data[6] foreground = data[7] if nranks > 1: logits_list = ddp_model(images) else: logits_list = model(images) loss_list = loss_computation(logits_list=logits_list, losses=losses, semantic=semantic, semantic_weights=semantic_weights, center=center, center_weights=center_weights, offset=offset, offset_weights=offset_weights) loss = sum(loss_list) loss.backward() optimizer.step() lr = optimizer.get_lr() if isinstance(optimizer._learning_rate, paddle.optimizer.lr.LRScheduler): optimizer._learning_rate.step() model.clear_gradients() avg_loss += loss.numpy()[0] if not avg_loss_list: avg_loss_list = [l.numpy() for l in loss_list] else: for i in range(len(loss_list)): avg_loss_list[i] += loss_list[i].numpy() batch_cost_averager.record(time.time() - batch_start, num_samples=batch_size) if (iter) % log_iters == 0 and local_rank == 0: avg_loss /= log_iters avg_loss_list = [l[0] / log_iters for l in avg_loss_list] remain_iters = iters - iter avg_train_batch_cost = batch_cost_averager.get_average() avg_train_reader_cost = reader_cost_averager.get_average() eta = calculate_eta(remain_iters, avg_train_batch_cost) logger.info( "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.5f}, ips={:.4f} samples/sec | ETA {}" .format((iter - 1) // iters_per_epoch + 1, iter, iters, avg_loss, lr, avg_train_batch_cost, avg_train_reader_cost, batch_cost_averager.get_ips_average(), eta)) logger.info( "[LOSS] loss={:.4f}, semantic_loss={:.4f}, center_loss={:.4f}, offset_loss={:.4f}" .format(avg_loss, avg_loss_list[0], avg_loss_list[1], avg_loss_list[2])) if use_vdl: log_writer.add_scalar('Train/loss', avg_loss, iter) # Record all losses if there are more than 2 losses. if len(avg_loss_list) > 1: avg_loss_dict = {} for i, value in enumerate(avg_loss_list): avg_loss_dict['loss_' + str(i)] = value for key, value in avg_loss_dict.items(): log_tag = 'Train/' + key log_writer.add_scalar(log_tag, value, iter) log_writer.add_scalar('Train/lr', lr, iter) log_writer.add_scalar('Train/batch_cost', avg_train_batch_cost, iter) log_writer.add_scalar('Train/reader_cost', avg_train_reader_cost, iter) avg_loss = 0.0 avg_loss_list = [] reader_cost_averager.reset() batch_cost_averager.reset() # save model if (iter % save_interval == 0 or iter == iters) and local_rank == 0: current_save_dir = os.path.join(save_dir, "iter_{}".format(iter)) if not os.path.isdir(current_save_dir): os.makedirs(current_save_dir) paddle.save(model.state_dict(), os.path.join(current_save_dir, 'model.pdparams')) paddle.save(optimizer.state_dict(), os.path.join(current_save_dir, 'model.pdopt')) save_models.append(current_save_dir) if len(save_models) > keep_checkpoint_max > 0: model_to_remove = save_models.popleft() shutil.rmtree(model_to_remove) # eval model if (iter % save_interval == 0 or iter == iters) and ( val_dataset is not None) and local_rank == 0 and iter > iters // 2: num_workers = 1 if num_workers > 0 else 0 panoptic_results, semantic_results, instance_results = evaluate( model, val_dataset, threshold=threshold, nms_kernel=nms_kernel, top_k=top_k, num_workers=num_workers, print_detail=False) pq = panoptic_results['pan_seg']['All']['pq'] miou = semantic_results['sem_seg']['mIoU'] map = instance_results['ins_seg']['mAP'] map50 = instance_results['ins_seg']['mAP50'] logger.info( "[EVAL] PQ: {:.4f}, mIoU: {:.4f}, mAP: {:.4f}, mAP50: {:.4f}" .format(pq, miou, map, map50)) model.train() # save best model and add evaluate results to vdl if (iter % save_interval == 0 or iter == iters) and local_rank == 0: if val_dataset is not None and iter > iters // 2: if pq > best_pq: best_pq = pq best_model_iter = iter best_model_dir = os.path.join(save_dir, "best_model") paddle.save( model.state_dict(), os.path.join(best_model_dir, 'model.pdparams')) logger.info( '[EVAL] The model with the best validation pq ({:.4f}) was saved at iter {}.' .format(best_pq, best_model_iter)) if use_vdl: log_writer.add_scalar('Evaluate/PQ', pq, iter) log_writer.add_scalar('Evaluate/mIoU', miou, iter) log_writer.add_scalar('Evaluate/mAP', map, iter) log_writer.add_scalar('Evaluate/mAP50', map50, iter) batch_start = time.time() # Calculate flops. if local_rank == 0: def count_syncbn(m, x, y): x = x[0] nelements = x.numel() m.total_ops += int(2 * nelements) _, c, h, w = images.shape flops = paddle.flops( model, [1, c, h, w], custom_ops={paddle.nn.SyncBatchNorm: count_syncbn}) # Sleep for half a second to let dataloader release resources. time.sleep(0.5) if use_vdl: log_writer.close()
def evaluate(model, eval_dataset, num_workers=0, print_detail=True, save_img=True): """ Launch evalution. Args: model(nn.Layer): A sementic segmentation model. eval_dataset (paddle.io.Dataset): Used to read and process validation datasets. num_workers (int, optional): Num workers for data loader. Default: 0. print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True. Returns: float: The mIoU of validation datasets. float: The accuracy of validation datasets. """ logger.info('Validating') evaluator = Eval(eval_dataset.NUM_CLASSES) evaluator.reset() model.eval() nranks = paddle.distributed.ParallelEnv().nranks local_rank = paddle.distributed.ParallelEnv().local_rank if nranks > 1: # Initialize parallel environment if not done. if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized( ): paddle.distributed.init_parallel_env() batch_sampler = paddle.io.DistributedBatchSampler( eval_dataset, batch_size=1, shuffle=False, drop_last=True) loader = paddle.io.DataLoader( eval_dataset, batch_sampler=batch_sampler, num_workers=num_workers, return_list=True, ) progbar_val = progbar.Progbar( target=len(loader), verbose=0 if nranks < 2 else 2) reader_cost_averager = TimeAverager() batch_cost_averager = TimeAverager() batch_start = time.time() with paddle.no_grad(): for idx, (x, y, _, item) in enumerate(loader): reader_cost_averager.record(time.time() - batch_start) # Forward y = y.astype('int64') pred = model(x) # 1, c, h, w if len(pred) > 1: pred = pred[0] # Convert to numpy label = y.squeeze(axis=1).numpy() # argpred = np.argmax(pred.numpy(), axis=1) # 1, 1, H, W if save_img: save_imgs(argpred, item, './output/') # Add to evaluator evaluator.add_batch(label, argpred) batch_cost_averager.record( time.time() - batch_start, num_samples=len(label)) batch_cost = batch_cost_averager.get_average() reader_cost = reader_cost_averager.get_average() if local_rank == 0 and print_detail and idx % 10 == 0: progbar_val.update(idx + 1, [('batch_cost', batch_cost), ('reader cost', reader_cost)]) reader_cost_averager.reset() batch_cost_averager.reset() batch_start = time.time() PA = evaluator.pixel_accuracy() MPA = evaluator.mean_pixel_accuracy() MIoU = evaluator.mean_iou() FWIoU = evaluator.fwiou() PC = evaluator.mean_precision() logger.info( 'PA1:{:.3f}, MPA1:{:.3f}, MIoU1:{:.3f}, FWIoU1:{:.3f}, PC:{:.3f}'. format(PA, MPA, MIoU, FWIoU, PC)) return PA, MPA, MIoU, FWIoU
def train(model, train_dataset, val_dataset=None, optimizer=None, save_dir='output', iters=10000, batch_size=2, resume_model=None, save_interval=1000, log_iters=10, num_workers=0, use_vdl=False, losses=None, keep_checkpoint_max=5): """ Launch training. Args: model(nn.Layer): A sementic segmentation model. train_dataset (paddle.io.Dataset): Used to read and process training datasets. val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets. optimizer (paddle.optimizer.Optimizer): The optimizer. save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'. iters (int, optional): How may iters to train the model. Defualt: 10000. batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2. resume_model (str, optional): The path of resume model. save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000. log_iters (int, optional): Display logging information at every log_iters. Default: 10. num_workers (int, optional): Num workers for data loader. Default: 0. use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False. losses (dict): A dict including 'types' and 'coef'. The length of coef should equal to 1 or len(losses['types']). The 'types' item is a list of object of paddleseg.models.losses while the 'coef' item is a list of the relevant coefficient. keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5. """ model.train() nranks = paddle.distributed.ParallelEnv().nranks local_rank = paddle.distributed.ParallelEnv().local_rank start_iter = 0 if resume_model is not None: start_iter = resume(model, optimizer, resume_model) if not os.path.isdir(save_dir): if os.path.exists(save_dir): os.remove(save_dir) os.makedirs(save_dir) if nranks > 1: # Initialize parallel environment if not done. if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized( ): paddle.distributed.init_parallel_env() ddp_model = paddle.DataParallel(model) else: ddp_model = paddle.DataParallel(model) batch_sampler = paddle.io.DistributedBatchSampler(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True) loader = paddle.io.DataLoader( train_dataset, batch_sampler=batch_sampler, num_workers=num_workers, return_list=True, ) if use_vdl: from visualdl import LogWriter log_writer = LogWriter(save_dir) avg_loss = 0.0 avg_loss_list = [] iters_per_epoch = len(batch_sampler) best_mean_iou = -1.0 best_model_iter = -1 reader_cost_averager = TimeAverager() batch_cost_averager = TimeAverager() save_models = deque() batch_start = time.time() iter = start_iter while iter < iters: for data in loader: iter += 1 if iter > iters: break reader_cost_averager.record(time.time() - batch_start) images = data[0] labels = data[1].astype('int64') edges = None if len(data) == 3: edges = data[2].astype('int64') if nranks > 1: logits_list = ddp_model(images) else: logits_list = model(images) loss_list = loss_computation(logits_list=logits_list, labels=labels, losses=losses, edges=edges) loss = sum(loss_list) loss.backward() optimizer.step() lr = optimizer.get_lr() if isinstance(optimizer._learning_rate, paddle.optimizer.lr.LRScheduler): optimizer._learning_rate.step() model.clear_gradients() avg_loss += loss.numpy()[0] if not avg_loss_list: avg_loss_list = [l.numpy() for l in loss_list] else: for i in range(len(loss_list)): avg_loss_list[i] += loss_list[i].numpy() batch_cost_averager.record(time.time() - batch_start, num_samples=batch_size) if (iter) % log_iters == 0 and local_rank == 0: avg_loss /= log_iters avg_loss_list = [l[0] / log_iters for l in avg_loss_list] remain_iters = iters - iter avg_train_batch_cost = batch_cost_averager.get_average() avg_train_reader_cost = reader_cost_averager.get_average() eta = calculate_eta(remain_iters, avg_train_batch_cost) logger.info( "[TRAIN] epoch: {}, iter: {}/{}, loss: {:.4f}, lr: {:.6f}, batch_cost: {:.4f}, reader_cost: {:.5f}, ips: {:.4f} samples/sec | ETA {}" .format((iter - 1) // iters_per_epoch + 1, iter, iters, avg_loss, lr, avg_train_batch_cost, avg_train_reader_cost, batch_cost_averager.get_ips_average(), eta)) if use_vdl: log_writer.add_scalar('Train/loss', avg_loss, iter) # Record all losses if there are more than 2 losses. if len(avg_loss_list) > 1: avg_loss_dict = {} for i, value in enumerate(avg_loss_list): avg_loss_dict['loss_' + str(i)] = value for key, value in avg_loss_dict.items(): log_tag = 'Train/' + key log_writer.add_scalar(log_tag, value, iter) log_writer.add_scalar('Train/lr', lr, iter) log_writer.add_scalar('Train/batch_cost', avg_train_batch_cost, iter) log_writer.add_scalar('Train/reader_cost', avg_train_reader_cost, iter) avg_loss = 0.0 avg_loss_list = [] reader_cost_averager.reset() batch_cost_averager.reset() if (iter % save_interval == 0 or iter == iters) and (val_dataset is not None): num_workers = 1 if num_workers > 0 else 0 mean_iou, acc, class_iou, _, _ = evaluate( model, val_dataset, num_workers=num_workers) model.train() if (iter % save_interval == 0 or iter == iters) and local_rank == 0: current_save_dir = os.path.join(save_dir, "iter_{}".format(iter)) if not os.path.isdir(current_save_dir): os.makedirs(current_save_dir) paddle.save(model.state_dict(), os.path.join(current_save_dir, 'model.pdparams')) paddle.save(optimizer.state_dict(), os.path.join(current_save_dir, 'model.pdopt')) save_models.append(current_save_dir) if len(save_models) > keep_checkpoint_max > 0: model_to_remove = save_models.popleft() shutil.rmtree(model_to_remove) if val_dataset is not None: if mean_iou > best_mean_iou: best_mean_iou = mean_iou best_model_iter = iter best_model_dir = os.path.join(save_dir, "best_model") paddle.save( model.state_dict(), os.path.join(best_model_dir, 'model.pdparams')) logger.info( '[EVAL] The model with the best validation mIoU ({:.4f}) was saved at iter {}.' .format(best_mean_iou, best_model_iter)) if use_vdl: log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter) for i, iou in enumerate(class_iou): log_writer.add_scalar('Evaluate/IoU {}'.format(i), float(iou), iter) log_writer.add_scalar('Evaluate/Acc', acc, iter) batch_start = time.time() # Calculate flops. if local_rank == 0: def count_syncbn(m, x, y): x = x[0] nelements = x.numel() m.total_ops += int(2 * nelements) _, c, h, w = images.shape flops = paddle.flops( model, [1, c, h, w], custom_ops={paddle.nn.SyncBatchNorm: count_syncbn}) # Sleep for half a second to let dataloader release resources. time.sleep(0.5) if use_vdl: log_writer.close()
def train(self, train_dataset_src, train_dataset_tgt, val_dataset_tgt=None, val_dataset_src=None, optimizer=None, save_dir='output', iters=10000, batch_size=2, resume_model=None, save_interval=1000, log_iters=10, num_workers=0, use_vdl=False, keep_checkpoint_max=5, test_config=None): """ Launch training. Args: train_dataset (paddle.io.Dataset): Used to read and process training datasets. val_dataset_tgt (paddle.io.Dataset, optional): Used to read and process validation datasets. optimizer (paddle.optimizer.Optimizer): The optimizer. save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'. iters (int, optional): How may iters to train the model. Defualt: 10000. batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2. resume_model (str, optional): The path of resume model. save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000. log_iters (int, optional): Display logging information at every log_iters. Default: 10. num_workers (int, optional): Num workers for data loader. Default: 0. use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False. keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5. test_config(dict, optional): Evaluation config. """ start_iter = 0 self.model.train() nranks = paddle.distributed.ParallelEnv().nranks local_rank = paddle.distributed.ParallelEnv().local_rank if resume_model is not None: logger.info(resume_model) start_iter = resume(self.model, optimizer, resume_model) load_ema_model(self.model, self.resume_ema) if not os.path.isdir(save_dir): if os.path.exists(save_dir): os.remove(save_dir) os.makedirs(save_dir) if nranks > 1: paddle.distributed.fleet.init(is_collective=True) optimizer = paddle.distributed.fleet.distributed_optimizer( optimizer) # The return is Fleet object ddp_model = paddle.distributed.fleet.distributed_model(self.model) batch_sampler_src = paddle.io.DistributedBatchSampler( train_dataset_src, batch_size=batch_size, shuffle=True, drop_last=True) loader_src = paddle.io.DataLoader( train_dataset_src, batch_sampler=batch_sampler_src, num_workers=num_workers, return_list=True, worker_init_fn=worker_init_fn, ) batch_sampler_tgt = paddle.io.DistributedBatchSampler( train_dataset_tgt, batch_size=batch_size, shuffle=True, drop_last=True) loader_tgt = paddle.io.DataLoader( train_dataset_tgt, batch_sampler=batch_sampler_tgt, num_workers=num_workers, return_list=True, worker_init_fn=worker_init_fn, ) if use_vdl: from visualdl import LogWriter log_writer = LogWriter(save_dir) iters_per_epoch = len(batch_sampler_tgt) best_mean_iou = -1.0 best_model_iter = -1 reader_cost_averager = TimeAverager() batch_cost_averager = TimeAverager() save_models = deque() batch_start = time.time() iter = start_iter while iter < iters: for _, (data_src, data_tgt) in enumerate(zip(loader_src, loader_tgt)): reader_cost_averager.record(time.time() - batch_start) loss_dict = {} #### training ##### images_tgt = data_tgt[0] labels_tgt = data_tgt[1].astype('int64') images_src = data_src[0] labels_src = data_src[1].astype('int64') edges_src = data_src[2].astype('int64') edges_tgt = data_tgt[2].astype('int64') if nranks > 1: logits_list_src = ddp_model(images_src) else: logits_list_src = self.model(images_src) ##### source seg & edge loss #### loss_src_seg_main = self.celoss(logits_list_src[0], labels_src) loss_src_seg_aux = 0.1 * self.celoss(logits_list_src[1], labels_src) loss_src_seg = loss_src_seg_main + loss_src_seg_aux loss_dict["source_main"] = loss_src_seg_main.numpy()[0] loss_dict["source_aux"] = loss_src_seg_aux.numpy()[0] loss = loss_src_seg del loss_src_seg, loss_src_seg_aux, loss_src_seg_main #### generate target pseudo label #### with paddle.no_grad(): if nranks > 1: logits_list_tgt = ddp_model(images_tgt) else: logits_list_tgt = self.model(images_tgt) pred_P_1 = F.softmax(logits_list_tgt[0], axis=1) labels_tgt_psu = paddle.argmax(pred_P_1.detach(), axis=1) # aux label pred_P_2 = F.softmax(logits_list_tgt[1], axis=1) pred_c = (pred_P_1 + pred_P_2) / 2 labels_tgt_psu_aux = paddle.argmax(pred_c.detach(), axis=1) if self.edgeconstrain: loss_src_edge = self.bceloss_src( logits_list_src[2], edges_src) # 1, 2 640, 1280 src_edge = paddle.argmax( logits_list_src[2].detach().clone(), axis=1) # 1, 1, 640,1280 src_edge_acc = ((src_edge == edges_src).numpy().sum().astype('float32')\ /functools.reduce(lambda a, b: a * b, src_edge.shape))*100 if (not self.src_only) and (iter > 200000): #### target seg & edge loss #### logger.info("Add target edege loss") edges_tgt = Func.mask_to_binary_edge( labels_tgt_psu.detach().clone().numpy(), radius=2, num_classes=train_dataset_tgt.NUM_CLASSES) edges_tgt = paddle.to_tensor(edges_tgt, dtype='int64') loss_tgt_edge = self.bceloss_tgt( logits_list_tgt[2], edges_tgt) loss_edge = loss_tgt_edge + loss_src_edge else: loss_tgt_edge = paddle.zeros([1]) loss_edge = loss_src_edge loss += loss_edge loss_dict['target_edge'] = loss_tgt_edge.numpy()[0] loss_dict['source_edge'] = loss_src_edge.numpy()[0] del loss_edge, loss_tgt_edge, loss_src_edge #### target aug loss ####### augs = augmentation.get_augmentation() images_tgt_aug, labels_tgt_aug = augmentation.augment( images=images_tgt.cpu(), labels=labels_tgt_psu.detach().cpu(), aug=augs, iters="{}_1".format(iter)) images_tgt_aug = images_tgt_aug.cuda() labels_tgt_aug = labels_tgt_aug.cuda() _, labels_tgt_aug_aux = augmentation.augment( images=images_tgt.cpu(), labels=labels_tgt_psu_aux.detach().cpu(), aug=augs, iters="{}_2".format(iter)) labels_tgt_aug_aux = labels_tgt_aug_aux.cuda() if nranks > 1: logits_list_tgt_aug = ddp_model(images_tgt_aug) else: logits_list_tgt_aug = self.model(images_tgt_aug) loss_tgt_aug_main = 0.1 * (self.celoss(logits_list_tgt_aug[0], labels_tgt_aug)) loss_tgt_aug_aux = 0.1 * (0.1 * self.celoss( logits_list_tgt_aug[1], labels_tgt_aug_aux)) loss_tgt_aug = loss_tgt_aug_aux + loss_tgt_aug_main loss += loss_tgt_aug loss_dict['target_aug_main'] = loss_tgt_aug_main.numpy()[0] loss_dict['target_aug_aux'] = loss_tgt_aug_aux.numpy()[0] del images_tgt_aug, labels_tgt_aug_aux, images_tgt, \ loss_tgt_aug, loss_tgt_aug_aux, loss_tgt_aug_main #### edge input seg; src & tgt edge pull in ###### if self.edgepullin: src_edge_logit = logits_list_src[2] feat_src = paddle.concat( [logits_list_src[0], src_edge_logit], axis=1).detach() out_src = self.model.fusion(feat_src) loss_src_edge_rec = self.celoss(out_src, labels_src) tgt_edge_logit = logits_list_tgt_aug[2] # tgt_edge_logit = paddle.to_tensor( # Func.mask_to_onehot(edges_tgt.squeeze().numpy(), 2) # ).unsqueeze(0).astype('float32') feat_tgt = paddle.concat( [logits_list_tgt[0], tgt_edge_logit], axis=1).detach() out_tgt = self.model.fusion(feat_tgt) loss_tgt_edge_rec = self.celoss(out_tgt, labels_tgt) loss_edge_rec = loss_tgt_edge_rec + loss_src_edge_rec loss += loss_edge_rec loss_dict['src_edge_rec'] = loss_src_edge_rec.numpy()[0] loss_dict['tgt_edge_rec'] = loss_tgt_edge_rec.numpy()[0] del loss_tgt_edge_rec, loss_src_edge_rec #### mask input feature & pullin ###### if self.featurepullin: # inner-class loss feat_src = logits_list_src[0] feat_tgt = logits_list_tgt_aug[0] center_src_s, center_tgt_s = [], [] total_pixs = logits_list_src[0].shape[2] * \ logits_list_src[0].shape[3] for i in range(train_dataset_tgt.NUM_CLASSES): pred = paddle.argmax( logits_list_src[0].detach().clone(), axis=1).unsqueeze(0) # 1, 1, 640, 1280 sel_num = paddle.sum((pred == i).astype('float32')) # ignore tensor that do not have features in this img if sel_num > 0: feat_sel_src = paddle.where( (pred == i).expand_as(feat_src), feat_src, paddle.zeros(feat_src.shape)) center_src = paddle.mean(feat_sel_src, axis=[ 2, 3 ]) / (sel_num / total_pixs) # 1, C self.src_centers[i] = 0.99 * self.src_centers[ i] + (1 - 0.99) * center_src pred = labels_tgt_aug.unsqueeze(0) # 1, 1, 512, 512 sel_num = paddle.sum((pred == i).astype('float32')) if sel_num > 0: feat_sel_tgt = paddle.where( (pred == i).expand_as(feat_tgt), feat_tgt, paddle.zeros(feat_tgt.shape)) center_tgt = paddle.mean(feat_sel_tgt, axis=[ 2, 3 ]) / (sel_num / total_pixs) self.tgt_centers[i] = 0.99 * self.tgt_centers[ i] + (1 - 0.99) * center_tgt center_src_s.append(center_src) center_tgt_s.append(center_tgt) if iter >= 3000: # average center structure alignment src_centers = paddle.concat(self.src_centers, axis=0) tgt_centers = paddle.concat(self.tgt_centers, axis=0) # 19, 2048 relatmat_src = paddle.matmul(src_centers, src_centers, transpose_y=True) # 19,19 relatmat_tgt = paddle.matmul(tgt_centers, tgt_centers, transpose_y=True) loss_intra_relate = self.klloss(relatmat_src, (relatmat_tgt+relatmat_src)/2) \ + self.klloss(relatmat_tgt, (relatmat_tgt+relatmat_src)/2) loss_pix_align_src = self.mseloss( paddle.to_tensor(center_src_s), paddle.to_tensor( self.src_centers).detach().clone()) loss_pix_align_tgt = self.mseloss( paddle.to_tensor(center_tgt_s), paddle.to_tensor( self.tgt_centers).detach().clone()) loss_feat_align = loss_pix_align_src + loss_pix_align_tgt + loss_intra_relate loss += loss_feat_align loss_dict['loss_pix_align_src'] = \ loss_pix_align_src.numpy()[0] loss_dict['loss_pix_align_tgt'] = \ loss_pix_align_tgt.numpy()[0] loss_dict['loss_intra_relate'] = \ loss_intra_relate.numpy()[0] del loss_pix_align_tgt, loss_pix_align_src, loss_intra_relate, self.tgt_centers = [ item.detach().clone() for item in self.tgt_centers ] self.src_centers = [ item.detach().clone() for item in self.src_centers ] loss.backward() del loss loss = sum(loss_dict.values()) optimizer.step() self.ema.update_params() with paddle.no_grad(): ##### log & save ##### lr = optimizer.get_lr() # update lr if isinstance(optimizer, paddle.distributed.fleet.Fleet): lr_sche = optimizer.user_defined_optimizer._learning_rate else: lr_sche = optimizer._learning_rate if isinstance(lr_sche, paddle.optimizer.lr.LRScheduler): lr_sche.step() if self.cfg['save_edge']: tgt_edge = paddle.argmax( logits_list_tgt_aug[2].detach().clone(), axis=1) # 1, 1, 640,1280 src_feed_gt = paddle.argmax( src_edge_logit.astype('float32'), axis=1) tgt_feed_gt = paddle.argmax( tgt_edge_logit.astype('float32'), axis=1) logger.info('src_feed_gt_{}_{}_{}'.format( src_feed_gt.shape, src_feed_gt.max(), src_feed_gt.min())) logger.info('tgt_feed_gt_{}_{}_{}'.format( tgt_feed_gt.shape, max(tgt_feed_gt), min(tgt_feed_gt))) save_edge(src_feed_gt, 'src_feed_gt_{}'.format(iter)) save_edge(tgt_feed_gt, 'tgt_feed_gt_{}'.format(iter)) save_edge(tgt_edge, 'tgt_pred_{}'.format(iter)) save_edge(src_edge, 'src_pred_{}_{}'.format(iter, src_edge_acc)) save_edge(edges_src, 'src_gt_{}'.format(iter)) save_edge(edges_tgt, 'tgt_gt_{}'.format(iter)) self.model.clear_gradients() batch_cost_averager.record(time.time() - batch_start, num_samples=batch_size) iter += 1 if (iter) % log_iters == 0 and local_rank == 0: label_tgt_acc = ((labels_tgt == labels_tgt_psu).numpy().sum().astype('float32')\ /functools.reduce(lambda a, b: a * b, labels_tgt_psu.shape))*100 remain_iters = iters - iter avg_train_batch_cost = batch_cost_averager.get_average( ) avg_train_reader_cost = reader_cost_averager.get_average( ) eta = calculate_eta(remain_iters, avg_train_batch_cost) logger.info( "[TRAIN] epoch: {}, iter: {}/{}, loss: {:.4f}, tgt_pix_acc: {:.4f}, lr: {:.6f}, batch_cost: {:.4f}, reader_cost: {:.5f}, ips: {:.4f} samples/sec | ETA {}" .format( (iter - 1) // iters_per_epoch + 1, iter, iters, loss, label_tgt_acc, lr, avg_train_batch_cost, avg_train_reader_cost, batch_cost_averager.get_ips_average(), eta)) if use_vdl: log_writer.add_scalar('Train/loss', loss, iter) # Record all losses if there are more than 2 losses. if len(loss_dict) > 1: for name, loss in loss_dict.items(): log_writer.add_scalar( 'Train/loss_' + name, loss, iter) log_writer.add_scalar('Train/lr', lr, iter) log_writer.add_scalar('Train/batch_cost', avg_train_batch_cost, iter) log_writer.add_scalar('Train/reader_cost', avg_train_reader_cost, iter) log_writer.add_scalar('Train/tgt_label_acc', label_tgt_acc, iter) reader_cost_averager.reset() batch_cost_averager.reset() if (iter % save_interval == 0 or iter == iters) and (val_dataset_tgt is not None): num_workers = 4 if num_workers > 0 else 0 # adjust num_worker=4 if test_config is None: test_config = {} self.ema.apply_shadow() self.ema.model.eval() PA_tgt, _, MIoU_tgt, _ = val.evaluate( self.model, val_dataset_tgt, num_workers=num_workers, **test_config) if (iter % (save_interval * 30)) == 0 \ and self.cfg['eval_src']: # add evaluate on src PA_src, _, MIoU_src, _ = val.evaluate( self.model, val_dataset_src, num_workers=num_workers, **test_config) logger.info( '[EVAL] The source mIoU is ({:.4f}) at iter {}.' .format(MIoU_src, iter)) self.ema.restore() self.model.train() if (iter % save_interval == 0 or iter == iters) and local_rank == 0: current_save_dir = os.path.join( save_dir, "iter_{}".format(iter)) if not os.path.isdir(current_save_dir): os.makedirs(current_save_dir) paddle.save( self.model.state_dict(), os.path.join(current_save_dir, 'model.pdparams')) paddle.save( self.ema.shadow, os.path.join(current_save_dir, 'model_ema.pdparams')) paddle.save( optimizer.state_dict(), os.path.join(current_save_dir, 'model.pdopt')) save_models.append(current_save_dir) if len(save_models) > keep_checkpoint_max > 0: model_to_remove = save_models.popleft() shutil.rmtree(model_to_remove) if val_dataset_tgt is not None: if MIoU_tgt > best_mean_iou: best_mean_iou = MIoU_tgt best_model_iter = iter best_model_dir = os.path.join( save_dir, "best_model") paddle.save( self.model.state_dict(), os.path.join(best_model_dir, 'model.pdparams')) logger.info( '[EVAL] The model with the best validation mIoU ({:.4f}) was saved at iter {}.' .format(best_mean_iou, best_model_iter)) if use_vdl: log_writer.add_scalar('Evaluate/mIoU', MIoU_tgt, iter) log_writer.add_scalar('Evaluate/PA', PA_tgt, iter) if self.cfg['eval_src']: log_writer.add_scalar('Evaluate/mIoU_src', MIoU_src, iter) log_writer.add_scalar('Evaluate/PA_src', PA_src, iter) batch_start = time.time() self.ema.update_buffer() # # Calculate flops. if local_rank == 0: def count_syncbn(m, x, y): x = x[0] nelements = x.numel() m.total_ops += int(2 * nelements) _, c, h, w = images_src.shape flops = paddle.flops( self.model, [1, c, h, w], custom_ops={paddle.nn.SyncBatchNorm: count_syncbn}) # Sleep for half a second to let dataloader release resources. time.sleep(0.5) if use_vdl: log_writer.close()
def evaluate(model, eval_dataset, num_workers=0, is_view=False, save_dir='output', print_detail=True): """ Launch evalution. Args: model(nn.Layer): A sementic segmentation model. eval_dataset (paddle.io.Dataset): Used to read and process validation datasets. num_workers (int, optional): Num workers for data loader. Default: 0. is_view (bool, optional): Whether to visualize results. Default: False. save_dir (str, optional): The directory to save the json or visualized results. Default: 'output'. print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True. Returns: float: The acc of validation datasets. float: The fp of validation datasets. float: The fn of validation datasets. """ model.eval() local_rank = paddle.distributed.ParallelEnv().local_rank loader = paddle.io.DataLoader( eval_dataset, batch_size=4, drop_last=False, num_workers=num_workers, return_list=True, ) postprocessor = tusimple_processor.TusimpleProcessor( num_classes=eval_dataset.num_classes, cut_height=eval_dataset.cut_height, test_gt_json=eval_dataset.test_gt_json, save_dir=save_dir, ) total_iters = len(loader) if print_detail: logger.info( "Start evaluating (total_samples: {}, total_iters: {})...".format( len(eval_dataset), total_iters)) progbar_val = progbar.Progbar(target=total_iters, verbose=1) reader_cost_averager = TimeAverager() batch_cost_averager = TimeAverager() batch_start = time.time() with paddle.no_grad(): for iter, (im, label, im_path) in enumerate(loader): reader_cost_averager.record(time.time() - batch_start) label = label.astype('int64') ori_shape = None time_start = time.time() pred = infer.inference( model, im, ori_shape=ori_shape, transforms=eval_dataset.transforms.transforms) time_end = time.time() postprocessor.dump_data_to_json(pred[1], im_path, run_time=time_end - time_start, is_dump_json=True, is_view=is_view) batch_cost_averager.record(time.time() - batch_start, num_samples=len(label)) batch_cost = batch_cost_averager.get_average() reader_cost = reader_cost_averager.get_average() if local_rank == 0 and print_detail: progbar_val.update(iter + 1, [('batch_cost', batch_cost), ('reader cost', reader_cost)]) reader_cost_averager.reset() batch_cost_averager.reset() batch_start = time.time() acc, fp, fn, eval_result = postprocessor.bench_one_submit(local_rank) if print_detail: logger.info(eval_result) return acc, fp, fn
class Predictor: def __init__(self, args): self.cfg = DeployConfig(args.cfg) self.args = args self.compose = T.Compose(self.cfg.transforms) resize_h, resize_w = args.input_shape self.disflow = cv2.DISOpticalFlow_create( cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) self.prev_gray = np.zeros((resize_h, resize_w), np.uint8) self.prev_cfd = np.zeros((resize_h, resize_w), np.float32) self.is_init = True pred_cfg = PredictConfig(self.cfg.model, self.cfg.params) pred_cfg.disable_glog_info() if self.args.use_gpu: pred_cfg.enable_use_gpu(100, 0) self.predictor = create_predictor(pred_cfg) if self.args.test_speed: self.cost_averager = TimeAverager() def preprocess(self, img): ori_shapes = [] processed_imgs = [] processed_img = self.compose(img)[0] processed_imgs.append(processed_img) ori_shapes.append(img.shape) return processed_imgs, ori_shapes def run(self, img, bg): input_names = self.predictor.get_input_names() input_handle = self.predictor.get_input_handle(input_names[0]) processed_imgs, ori_shapes = self.preprocess(img) data = np.array(processed_imgs) input_handle.reshape(data.shape) input_handle.copy_from_cpu(data) if self.args.test_speed: start = time.time() self.predictor.run() if self.args.test_speed: self.cost_averager.record(time.time() - start) output_names = self.predictor.get_output_names() output_handle = self.predictor.get_output_handle(output_names[0]) output = output_handle.copy_to_cpu() return self.postprocess(output, img, ori_shapes[0], bg) def postprocess(self, pred, img, ori_shape, bg): if not os.path.exists(self.args.save_dir): os.makedirs(self.args.save_dir) resize_w = pred.shape[-1] resize_h = pred.shape[-2] if self.args.soft_predict: if self.args.use_optic_flow: score_map = pred[:, 1, :, :].squeeze(0) score_map = 255 * score_map cur_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) cur_gray = cv2.resize(cur_gray, (resize_w, resize_h)) optflow_map = optic_flow_process(cur_gray, score_map, self.prev_gray, self.prev_cfd, \ self.disflow, self.is_init) self.prev_gray = cur_gray.copy() self.prev_cfd = optflow_map.copy() self.is_init = False score_map = np.repeat(optflow_map[:, :, np.newaxis], 3, axis=2) score_map = np.transpose(score_map, [2, 0, 1])[np.newaxis, ...] score_map = reverse_transform(paddle.to_tensor(score_map), ori_shape, self.cfg.transforms, mode='bilinear') alpha = np.transpose(score_map.numpy().squeeze(0), [1, 2, 0]) / 255 else: score_map = pred[:, 1, :, :] score_map = score_map[np.newaxis, ...] score_map = reverse_transform(paddle.to_tensor(score_map), ori_shape, self.cfg.transforms, mode='bilinear') alpha = np.transpose(score_map.numpy().squeeze(0), [1, 2, 0]) else: if pred.ndim == 3: pred = pred[:, np.newaxis, ...] result = reverse_transform(paddle.to_tensor(pred, dtype='float32'), ori_shape, self.cfg.transforms, mode='bilinear') result = np.array(result) if self.args.add_argmax: result = np.argmax(result, axis=1) else: result = result.squeeze(1) alpha = np.transpose(result, [1, 2, 0]) # background replace h, w, _ = img.shape bg = cv2.resize(bg, (w, h)) if bg.ndim == 2: bg = bg[..., np.newaxis] comb = (alpha * img + (1 - alpha) * bg).astype(np.uint8) return comb
def evaluate(model, eval_dataset, threshold=0.1, nms_kernel=7, top_k=200, num_workers=0, print_detail=True): """ Launch evaluation. Args: model(nn.Layer): A sementic segmentation model. eval_dataset (paddle.io.Dataset): Used to read and process validation datasets. threshold (float, optional): Threshold applied to center heatmap score. Defalut: 0.1. nms_kernel (int, optional): NMS max pooling kernel size. Default: 7. top_k (int, optional): Top k centers to keep. Default: 200. num_workers (int, optional): Num workers for data loader. Default: 0. print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True. Returns: dict: Panoptic evaluation results which includes PQ, RQ, SQ for all, each class, Things and stuff. dict: Semantic evaluation results which includes mIoU, fwIoU, mACC and pACC. dict: Instance evaluation results which includes mAP and mAP50, and also AP and AP50 for each class. """ model.eval() nranks = paddle.distributed.ParallelEnv().nranks local_rank = paddle.distributed.ParallelEnv().local_rank if nranks > 1: # Initialize parallel environment if not done. if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized( ): paddle.distributed.init_parallel_env() batch_sampler = paddle.io.DistributedBatchSampler(eval_dataset, batch_size=1, shuffle=False, drop_last=False) loader = paddle.io.DataLoader( eval_dataset, batch_sampler=batch_sampler, num_workers=num_workers, return_list=True, ) total_iters = len(loader) semantic_metric = SemanticEvaluator(eval_dataset.num_classes, ignore_index=eval_dataset.ignore_index) instance_metric_AP50 = InstanceEvaluator( eval_dataset.num_classes, overlaps=0.5, thing_list=eval_dataset.thing_list) instance_metric_AP = InstanceEvaluator(eval_dataset.num_classes, overlaps=list( np.arange(0.5, 1.0, 0.05)), thing_list=eval_dataset.thing_list) panoptic_metric = PanopticEvaluator( num_classes=eval_dataset.num_classes, thing_list=eval_dataset.thing_list, ignore_index=eval_dataset.ignore_index, label_divisor=eval_dataset.label_divisor) if print_detail: logger.info( "Start evaluating (total_samples={}, total_iters={})...".format( len(eval_dataset), total_iters)) progbar_val = progbar.Progbar(target=total_iters, verbose=1) reader_cost_averager = TimeAverager() batch_cost_averager = TimeAverager() batch_start = time.time() with paddle.no_grad(): for iter, data in enumerate(loader): reader_cost_averager.record(time.time() - batch_start) im = data[0] raw_semantic_label = data[1] # raw semantic label. raw_instance_label = data[2] raw_panoptic_label = data[3] ori_shape = raw_semantic_label.shape[-2:] semantic, semantic_softmax, instance, panoptic, ctr_hmp = infer.inference( model=model, im=im, transforms=eval_dataset.transforms.transforms, thing_list=eval_dataset.thing_list, label_divisor=eval_dataset.label_divisor, stuff_area=eval_dataset.stuff_area, ignore_index=eval_dataset.ignore_index, threshold=threshold, nms_kernel=nms_kernel, top_k=top_k, ori_shape=ori_shape) semantic = semantic.squeeze().numpy() semantic_softmax = semantic_softmax.squeeze().numpy() instance = instance.squeeze().numpy() panoptic = panoptic.squeeze().numpy() ctr_hmp = ctr_hmp.squeeze().numpy() raw_semantic_label = raw_semantic_label.squeeze().numpy() raw_instance_label = raw_instance_label.squeeze().numpy() raw_panoptic_label = raw_panoptic_label.squeeze().numpy() # update metric for semantic, instance, panoptic semantic_metric.update(semantic, raw_semantic_label) gts = instance_metric_AP.convert_gt_map(raw_semantic_label, raw_instance_label) # print([i[0] for i in gts]) preds = instance_metric_AP.convert_pred_map( semantic_softmax, panoptic) # print([(i[0], i[1]) for i in preds ]) ignore_mask = raw_semantic_label == eval_dataset.ignore_index instance_metric_AP.update(preds, gts, ignore_mask=ignore_mask) instance_metric_AP50.update(preds, gts, ignore_mask=ignore_mask) panoptic_metric.update(panoptic, raw_panoptic_label) batch_cost_averager.record(time.time() - batch_start, num_samples=len(im)) batch_cost = batch_cost_averager.get_average() reader_cost = reader_cost_averager.get_average() if local_rank == 0: progbar_val.update(iter + 1, [('batch_cost', batch_cost), ('reader cost', reader_cost)]) reader_cost_averager.reset() batch_cost_averager.reset() batch_start = time.time() semantic_results = semantic_metric.evaluate() panoptic_results = panoptic_metric.evaluate() instance_results = OrderedDict() ins_ap = instance_metric_AP.evaluate() ins_ap50 = instance_metric_AP50.evaluate() instance_results['ins_seg'] = OrderedDict() instance_results['ins_seg']['mAP'] = ins_ap['ins_seg']['mAP'] instance_results['ins_seg']['AP'] = ins_ap['ins_seg']['AP'] instance_results['ins_seg']['mAP50'] = ins_ap50['ins_seg']['mAP'] instance_results['ins_seg']['AP50'] = ins_ap50['ins_seg']['AP'] if print_detail: logger.info(panoptic_results) print() logger.info(semantic_results) print() logger.info(instance_results) print() pq = panoptic_results['pan_seg']['All']['pq'] miou = semantic_results['sem_seg']['mIoU'] map = instance_results['ins_seg']['mAP'] map50 = instance_results['ins_seg']['mAP50'] logger.info( "PQ: {:.4f}, mIoU: {:.4f}, mAP: {:.4f}, mAP50: {:.4f}".format( pq, miou, map, map50)) return panoptic_results, semantic_results, instance_results