Esempio n. 1
0
    def __init__(self, args):
        self.cfg = DeployConfig(args.cfg)
        self.args = args
        self.compose = T.Compose(self.cfg.transforms)
        resize_h, resize_w = args.input_shape

        self.disflow = cv2.DISOpticalFlow_create(
            cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
        self.prev_gray = np.zeros((resize_h, resize_w), np.uint8)
        self.prev_cfd = np.zeros((resize_h, resize_w), np.float32)
        self.is_init = True

        pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        pred_cfg.disable_glog_info()
        if self.args.use_gpu:
            pred_cfg.enable_use_gpu(100, 0)

        self.predictor = create_predictor(pred_cfg)
        if self.args.test_speed:
            self.cost_averager = TimeAverager()
Esempio n. 2
0
def train(model,
          train_dataset,
          val_dataset=None,
          optimizer=None,
          save_dir='output',
          iters=10000,
          batch_size=2,
          resume_model=None,
          save_interval=1000,
          log_iters=10,
          num_workers=0,
          use_vdl=False,
          losses=None,
          keep_checkpoint_max=5,
          eval_begin_iters=None):
    """
    Launch training.
    Args:
        model(nn.Layer): A matting model.
        train_dataset (paddle.io.Dataset): Used to read and process training datasets.
        val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
        optimizer (paddle.optimizer.Optimizer): The optimizer.
        save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'.
        iters (int, optional): How may iters to train the model. Defualt: 10000.
        batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2.
        resume_model (str, optional): The path of resume model.
        save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000.
        log_iters (int, optional): Display logging information at every log_iters. Default: 10.
        num_workers (int, optional): Num workers for data loader. Default: 0.
        use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False.
        losses (dict, optional): A dict of loss, refer to the loss function of the model for details. Default: None.
        keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
        eval_begin_iters (int): The iters begin evaluation. It will evaluate at iters/2 if it is None. Defalust: None.
    """
    model.train()
    nranks = paddle.distributed.ParallelEnv().nranks
    local_rank = paddle.distributed.ParallelEnv().local_rank

    start_iter = 0
    if resume_model is not None:
        start_iter = resume(model, optimizer, resume_model)

    if not os.path.isdir(save_dir):
        if os.path.exists(save_dir):
            os.remove(save_dir)
        os.makedirs(save_dir)

    if nranks > 1:
        # Initialize parallel environment if not done.
        if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
        ):
            paddle.distributed.init_parallel_env()
            ddp_model = paddle.DataParallel(model)
        else:
            ddp_model = paddle.DataParallel(model)

    batch_sampler = paddle.io.DistributedBatchSampler(train_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=True,
                                                      drop_last=True)

    loader = paddle.io.DataLoader(
        train_dataset,
        batch_sampler=batch_sampler,
        num_workers=num_workers,
        return_list=True,
    )

    if use_vdl:
        from visualdl import LogWriter
        log_writer = LogWriter(save_dir)

    avg_loss = defaultdict(float)
    iters_per_epoch = len(batch_sampler)
    best_sad = np.inf
    best_model_iter = -1
    reader_cost_averager = TimeAverager()
    batch_cost_averager = TimeAverager()
    save_models = deque()
    batch_start = time.time()

    iter = start_iter
    while iter < iters:
        for data in loader:
            iter += 1
            if iter > iters:
                break
            reader_cost_averager.record(time.time() - batch_start)

            # model input
            if nranks > 1:
                logit_dict = ddp_model(data)
            else:
                logit_dict = model(data)
            loss_dict = model.loss(logit_dict, data, losses)

            loss_dict['all'].backward()

            optimizer.step()
            lr = optimizer.get_lr()
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                optimizer._learning_rate.step()
            model.clear_gradients()

            for key, value in loss_dict.items():
                avg_loss[key] += value.numpy()[0]
            batch_cost_averager.record(time.time() - batch_start,
                                       num_samples=batch_size)

            if (iter) % log_iters == 0 and local_rank == 0:
                for key, value in avg_loss.items():
                    avg_loss[key] = value / log_iters
                remain_iters = iters - iter
                avg_train_batch_cost = batch_cost_averager.get_average()
                avg_train_reader_cost = reader_cost_averager.get_average()
                eta = calculate_eta(remain_iters, avg_train_batch_cost)
                logger.info(
                    "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.5f}, ips={:.4f} samples/sec | ETA {}"
                    .format((iter - 1) // iters_per_epoch + 1, iter, iters,
                            avg_loss['all'], lr, avg_train_batch_cost,
                            avg_train_reader_cost,
                            batch_cost_averager.get_ips_average(), eta))
                # print loss
                loss_str = '[TRAIN] [LOSS] '
                loss_str = loss_str + 'all={:.4f}'.format(avg_loss['all'])
                for key, value in avg_loss.items():
                    if key != 'all':
                        loss_str = loss_str + ' ' + key + '={:.4f}'.format(
                            value)
                logger.info(loss_str)
                if use_vdl:
                    for key, value in avg_loss.items():
                        log_tag = 'Train/' + key
                        log_writer.add_scalar(log_tag, value, iter)

                    log_writer.add_scalar('Train/lr', lr, iter)
                    log_writer.add_scalar('Train/batch_cost',
                                          avg_train_batch_cost, iter)
                    log_writer.add_scalar('Train/reader_cost',
                                          avg_train_reader_cost, iter)

                for key in avg_loss.keys():
                    avg_loss[key] = 0.
                reader_cost_averager.reset()
                batch_cost_averager.reset()

            # save model
            if (iter % save_interval == 0
                    or iter == iters) and local_rank == 0:
                current_save_dir = os.path.join(save_dir,
                                                "iter_{}".format(iter))
                if not os.path.isdir(current_save_dir):
                    os.makedirs(current_save_dir)
                paddle.save(model.state_dict(),
                            os.path.join(current_save_dir, 'model.pdparams'))
                paddle.save(optimizer.state_dict(),
                            os.path.join(current_save_dir, 'model.pdopt'))
                save_models.append(current_save_dir)
                if len(save_models) > keep_checkpoint_max > 0:
                    model_to_remove = save_models.popleft()
                    shutil.rmtree(model_to_remove)

            # eval model
            if eval_begin_iters is None:
                eval_begin_iters = iters // 2
            if (iter % save_interval == 0 or iter == iters) and (
                    val_dataset is not None
            ) and local_rank == 0 and iter >= eval_begin_iters:
                num_workers = 1 if num_workers > 0 else 0
                sad, mse = evaluate(model,
                                    val_dataset,
                                    num_workers=0,
                                    print_detail=True,
                                    save_results=False)
                model.train()

            # save best model and add evaluation results to vdl
            if (iter % save_interval == 0
                    or iter == iters) and local_rank == 0:
                if val_dataset is not None and iter >= eval_begin_iters:
                    if sad < best_sad:
                        best_sad = sad
                        best_model_iter = iter
                        best_model_dir = os.path.join(save_dir, "best_model")
                        paddle.save(
                            model.state_dict(),
                            os.path.join(best_model_dir, 'model.pdparams'))
                    logger.info(
                        '[EVAL] The model with the best validation sad ({:.4f}) was saved at iter {}.'
                        .format(best_sad, best_model_iter))

                    if use_vdl:
                        log_writer.add_scalar('Evaluate/SAD', sad, iter)
                        log_writer.add_scalar('Evaluate/MSE', mse, iter)

            batch_start = time.time()

    # Sleep for half a second to let dataloader release resources.
    time.sleep(0.5)
    if use_vdl:
        log_writer.close()
Esempio n. 3
0
def predict(model,
            model_path,
            transforms,
            image_list,
            image_dir=None,
            trimap_list=None,
            save_dir='output'):
    """
    predict and visualize the image_list.

    Args:
        model (nn.Layer): Used to predict for input image.
        model_path (str): The path of pretrained model.
        transforms (transforms.Compose): Preprocess for input image.
        image_list (list): A list of image path to be predicted.
        image_dir (str, optional): The root directory of the images predicted. Default: None.
        trimap_list (list, optional): A list of trimap of image_list. Default: None.
        save_dir (str, optional): The directory to save the visualized results. Default: 'output'.
    """
    utils.utils.load_entire_model(model, model_path)
    model.eval()
    nranks = paddle.distributed.get_world_size()
    local_rank = paddle.distributed.get_rank()
    if nranks > 1:
        img_lists = partition_list(image_list, nranks)
        trimap_lists = partition_list(
            trimap_list, nranks) if trimap_list is not None else None
    else:
        img_lists = [image_list]
        trimap_lists = [trimap_list] if trimap_list is not None else None

    logger.info("Start to predict...")
    progbar_pred = progbar.Progbar(target=len(img_lists[0]), verbose=1)
    preprocess_cost_averager = TimeAverager()
    infer_cost_averager = TimeAverager()
    postprocess_cost_averager = TimeAverager()
    batch_start = time.time()
    with paddle.no_grad():
        for i, im_path in enumerate(img_lists[local_rank]):
            preprocess_start = time.time()
            trimap = trimap_lists[local_rank][
                i] if trimap_list is not None else None
            data = preprocess(img=im_path, transforms=transforms, trimap=trimap)
            preprocess_cost_averager.record(time.time() - preprocess_start)

            infer_start = time.time()
            alpha_pred = model(data)
            infer_cost_averager.record(time.time() - infer_start)

            postprocess_start = time.time()
            alpha_pred = reverse_transform(alpha_pred, data['trans_info'])
            alpha_pred = (alpha_pred.numpy()).squeeze()
            alpha_pred = (alpha_pred * 255).astype('uint8')

            # get the saved name
            if image_dir is not None:
                im_file = im_path.replace(image_dir, '')
            else:
                im_file = os.path.basename(im_path)
            if im_file[0] == '/' or im_file[0] == '\\':
                im_file = im_file[1:]

            save_path = os.path.join(save_dir, im_file)
            mkdir(save_path)
            save_alpha_pred(alpha_pred, save_path, trimap=trimap)

            postprocess_cost_averager.record(time.time() - postprocess_start)

            preprocess_cost = preprocess_cost_averager.get_average()
            infer_cost = infer_cost_averager.get_average()
            postprocess_cost = postprocess_cost_averager.get_average()
            if local_rank == 0:
                progbar_pred.update(i + 1,
                                    [('preprocess_cost', preprocess_cost),
                                     ('infer_cost cost', infer_cost),
                                     ('postprocess_cost', postprocess_cost)])

            preprocess_cost_averager.reset()
            infer_cost_averager.reset()
            postprocess_cost_averager.reset()
    return alpha_pred
Esempio n. 4
0
def evaluate(model,
             eval_dataset,
             aug_eval=False,
             scales=1.0,
             flip_horizontal=True,
             flip_vertical=False,
             is_slide=False,
             stride=None,
             crop_size=None,
             num_workers=0,
             print_detail=True):
    """
    Launch evalution.

    Args:
        model(nn.Layer): A sementic segmentation model.
        eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
        aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
        scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
        flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_eval` is True. Default: True.
        flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_eval` is True. Default: False.
        is_slide (bool, optional): Whether to evaluate by sliding window. Default: False.
        stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height.
            It should be provided when `is_slide` is True.
        crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
            It should be provided when `is_slide` is True.
        num_workers (int, optional): Num workers for data loader. Default: 0.
        print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.

    Returns:
        float: The mIoU of validation datasets.
        float: The accuracy of validation datasets.
    """
    model.eval()
    nranks = paddle.distributed.ParallelEnv().nranks
    local_rank = paddle.distributed.ParallelEnv().local_rank
    if nranks > 1:
        # Initialize parallel environment if not done.
        if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
        ):
            paddle.distributed.init_parallel_env()
    batch_sampler = paddle.io.DistributedBatchSampler(eval_dataset,
                                                      batch_size=1,
                                                      shuffle=False,
                                                      drop_last=False)
    loader = paddle.io.DataLoader(
        eval_dataset,
        batch_sampler=batch_sampler,
        num_workers=num_workers,
        return_list=True,
    )

    total_iters = len(loader)
    intersect_area_all = 0
    pred_area_all = 0
    label_area_all = 0

    if print_detail:
        logger.info(
            "Start evaluating (total_samples={}, total_iters={})...".format(
                len(eval_dataset), total_iters))
    progbar_val = progbar.Progbar(target=total_iters, verbose=1)
    reader_cost_averager = TimeAverager()
    batch_cost_averager = TimeAverager()
    batch_start = time.time()
    with paddle.no_grad():
        for iter, (im, label) in enumerate(loader):
            reader_cost_averager.record(time.time() - batch_start)
            label = label.astype('int64')

            ori_shape = label.shape[-2:]
            if aug_eval:
                pred = infer.aug_inference(
                    model,
                    im,
                    ori_shape=ori_shape,
                    transforms=eval_dataset.transforms.transforms,
                    scales=scales,
                    flip_horizontal=flip_horizontal,
                    flip_vertical=flip_vertical,
                    is_slide=is_slide,
                    stride=stride,
                    crop_size=crop_size)
            else:
                pred = infer.inference(
                    model,
                    im,
                    ori_shape=ori_shape,
                    transforms=eval_dataset.transforms.transforms,
                    is_slide=is_slide,
                    stride=stride,
                    crop_size=crop_size)

            intersect_area, pred_area, label_area = metrics.calculate_area(
                pred,
                label,
                eval_dataset.num_classes,
                ignore_index=eval_dataset.ignore_index)

            # Gather from all ranks
            if nranks > 1:
                intersect_area_list = []
                pred_area_list = []
                label_area_list = []
                paddle.distributed.all_gather(intersect_area_list,
                                              intersect_area)
                paddle.distributed.all_gather(pred_area_list, pred_area)
                paddle.distributed.all_gather(label_area_list, label_area)

                # Some image has been evaluated and should be eliminated in last iter
                if (iter + 1) * nranks > len(eval_dataset):
                    valid = len(eval_dataset) - iter * nranks
                    intersect_area_list = intersect_area_list[:valid]
                    pred_area_list = pred_area_list[:valid]
                    label_area_list = label_area_list[:valid]

                for i in range(len(intersect_area_list)):
                    intersect_area_all = intersect_area_all + intersect_area_list[
                        i]
                    pred_area_all = pred_area_all + pred_area_list[i]
                    label_area_all = label_area_all + label_area_list[i]
            else:
                intersect_area_all = intersect_area_all + intersect_area
                pred_area_all = pred_area_all + pred_area
                label_area_all = label_area_all + label_area
            batch_cost_averager.record(time.time() - batch_start,
                                       num_samples=len(label))
            batch_cost = batch_cost_averager.get_average()
            reader_cost = reader_cost_averager.get_average()

            if local_rank == 0 and print_detail:
                progbar_val.update(iter + 1, [('batch_cost', batch_cost),
                                              ('reader cost', reader_cost)])
            reader_cost_averager.reset()
            batch_cost_averager.reset()
            batch_start = time.time()

    class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
                                       label_area_all)
    class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
    kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)

    if print_detail:
        logger.info(
            "[EVAL] #Images={} mIoU={:.4f} Acc={:.4f} Kappa={:.4f} ".format(
                len(eval_dataset), miou, acc, kappa))
        logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
        logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
    return miou, acc
Esempio n. 5
0
def train(model,
          train_dataset,
          val_dataset=None,
          optimizer=None,
          save_dir='output',
          iters=10000,
          batch_size=2,
          resume_model=None,
          save_interval=1000,
          log_iters=10,
          num_workers=0,
          use_vdl=False,
          losses=None,
          keep_checkpoint_max=5,
          test_config=None,
          fp16=False,
          profiler_options=None):
    """
    Launch training.

    Args:
        model(nn.Layer): A sementic segmentation model.
        train_dataset (paddle.io.Dataset): Used to read and process training datasets.
        val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
        optimizer (paddle.optimizer.Optimizer): The optimizer.
        save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'.
        iters (int, optional): How may iters to train the model. Defualt: 10000.
        batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2.
        resume_model (str, optional): The path of resume model.
        save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000.
        log_iters (int, optional): Display logging information at every log_iters. Default: 10.
        num_workers (int, optional): Num workers for data loader. Default: 0.
        use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False.
        losses (dict, optional): A dict including 'types' and 'coef'. The length of coef should equal to 1 or len(losses['types']).
            The 'types' item is a list of object of paddleseg.models.losses while the 'coef' item is a list of the relevant coefficient.
        keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
        test_config(dict, optional): Evaluation config.
        fp16 (bool, optional): Whether to use amp.
        profiler_options (str, optional): The option of train profiler.
    """
    model.train()
    nranks = paddle.distributed.ParallelEnv().nranks
    local_rank = paddle.distributed.ParallelEnv().local_rank

    start_iter = 0
    if resume_model is not None:
        start_iter = resume(model, optimizer, resume_model)

    if not os.path.isdir(save_dir):
        if os.path.exists(save_dir):
            os.remove(save_dir)
        os.makedirs(save_dir)

    if nranks > 1:
        paddle.distributed.fleet.init(is_collective=True)
        optimizer = paddle.distributed.fleet.distributed_optimizer(
            optimizer)  # The return is Fleet object
        ddp_model = paddle.distributed.fleet.distributed_model(model)

    batch_sampler = paddle.io.DistributedBatchSampler(train_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=True,
                                                      drop_last=True)

    loader = paddle.io.DataLoader(
        train_dataset,
        batch_sampler=batch_sampler,
        num_workers=num_workers,
        return_list=True,
        worker_init_fn=worker_init_fn,
    )

    # use amp
    if fp16:
        logger.info('use amp to train')
        scaler = paddle.amp.GradScaler(init_loss_scaling=1024)

    if use_vdl:
        from visualdl import LogWriter
        log_writer = LogWriter(save_dir)

    avg_loss = 0.0
    avg_loss_list = []
    iters_per_epoch = len(batch_sampler)
    best_acc = -1.0
    best_model_iter = -1
    reader_cost_averager = TimeAverager()
    batch_cost_averager = TimeAverager()
    save_models = deque()
    batch_start = time.time()

    iter = start_iter
    while iter < iters:
        for data in loader:
            iter += 1
            if iter > iters:
                version = paddle.__version__
                if version == '2.1.2':
                    continue
                else:
                    break
            reader_cost_averager.record(time.time() - batch_start)
            images = data[0]
            labels = data[1].astype('int64')
            edges = None
            if len(data) == 3:
                edges = data[2].astype('int64')
            if hasattr(model, 'data_format') and model.data_format == 'NHWC':
                images = images.transpose((0, 2, 3, 1))

            if fp16:
                with paddle.amp.auto_cast(
                        enable=True,
                        custom_white_list={
                            "elementwise_add", "batch_norm", "sync_batch_norm"
                        },
                        custom_black_list={'bilinear_interp_v2'}):
                    if nranks > 1:
                        logits_list = ddp_model(images)
                    else:
                        logits_list = model(images)
                    loss_list = loss_computation(logits_list=logits_list,
                                                 labels=labels,
                                                 losses=losses,
                                                 edges=edges)
                    loss = sum(loss_list)

                scaled = scaler.scale(loss)  # scale the loss
                scaled.backward()  # do backward
                if isinstance(optimizer, paddle.distributed.fleet.Fleet):
                    scaler.minimize(optimizer.user_defined_optimizer, scaled)
                else:
                    scaler.minimize(optimizer, scaled)  # update parameters
            else:
                if nranks > 1:
                    logits_list = ddp_model(images)
                else:
                    logits_list = model(images)
                loss_list = loss_computation(logits_list=logits_list,
                                             labels=labels,
                                             losses=losses,
                                             edges=edges)
                loss = sum(loss_list)
                loss.backward()
                optimizer.step()

            lr = optimizer.get_lr()

            # update lr
            if isinstance(optimizer, paddle.distributed.fleet.Fleet):
                lr_sche = optimizer.user_defined_optimizer._learning_rate
            else:
                lr_sche = optimizer._learning_rate
            if isinstance(lr_sche, paddle.optimizer.lr.LRScheduler):
                lr_sche.step()

            train_profiler.add_profiler_step(profiler_options)

            model.clear_gradients()
            avg_loss += loss.numpy()[0]
            if not avg_loss_list:
                avg_loss_list = [l.numpy() for l in loss_list]
            else:
                for i in range(len(loss_list)):
                    avg_loss_list[i] += loss_list[i].numpy()
            batch_cost_averager.record(time.time() - batch_start,
                                       num_samples=batch_size)

            if (iter) % log_iters == 0 and local_rank == 0:
                avg_loss /= log_iters
                avg_loss_list = [l[0] / log_iters for l in avg_loss_list]
                remain_iters = iters - iter
                avg_train_batch_cost = batch_cost_averager.get_average()
                avg_train_reader_cost = reader_cost_averager.get_average()
                eta = calculate_eta(remain_iters, avg_train_batch_cost)
                logger.info(
                    "[TRAIN] epoch: {}, iter: {}/{}, loss: {:.4f}, lr: {:.6f}, batch_cost: {:.4f}, reader_cost: {:.5f}, ips: {:.4f} samples/sec | ETA {}"
                    .format((iter - 1) // iters_per_epoch + 1, iter, iters,
                            avg_loss, lr, avg_train_batch_cost,
                            avg_train_reader_cost,
                            batch_cost_averager.get_ips_average(), eta))
                if use_vdl:
                    log_writer.add_scalar('Train/loss', avg_loss, iter)
                    # Record all losses if there are more than 2 losses.
                    if len(avg_loss_list) > 1:
                        avg_loss_dict = {}
                        for i, value in enumerate(avg_loss_list):
                            avg_loss_dict['loss_' + str(i)] = value
                        for key, value in avg_loss_dict.items():
                            log_tag = 'Train/' + key
                            log_writer.add_scalar(log_tag, value, iter)

                    log_writer.add_scalar('Train/lr', lr, iter)
                    log_writer.add_scalar('Train/batch_cost',
                                          avg_train_batch_cost, iter)
                    log_writer.add_scalar('Train/reader_cost',
                                          avg_train_reader_cost, iter)
                avg_loss = 0.0
                avg_loss_list = []
                reader_cost_averager.reset()
                batch_cost_averager.reset()

            if (iter % save_interval == 0 or iter == iters) and (val_dataset
                                                                 is not None):
                num_workers = 1 if num_workers > 0 else 0

                if test_config is None:
                    test_config = {}

                acc, fp, fn = evaluate(model,
                                       val_dataset,
                                       num_workers=num_workers,
                                       save_dir=save_dir,
                                       **test_config)

                model.train()

            if (iter % save_interval == 0
                    or iter == iters) and local_rank == 0:
                current_save_dir = os.path.join(save_dir,
                                                "iter_{}".format(iter))
                if not os.path.isdir(current_save_dir):
                    os.makedirs(current_save_dir)
                paddle.save(model.state_dict(),
                            os.path.join(current_save_dir, 'model.pdparams'))
                paddle.save(optimizer.state_dict(),
                            os.path.join(current_save_dir, 'model.pdopt'))
                save_models.append(current_save_dir)
                if len(save_models) > keep_checkpoint_max > 0:
                    model_to_remove = save_models.popleft()
                    shutil.rmtree(model_to_remove)

                if val_dataset is not None:
                    if acc > best_acc:
                        best_acc = acc
                        best_model_iter = iter
                        best_model_dir = os.path.join(save_dir, "best_model")
                        paddle.save(
                            model.state_dict(),
                            os.path.join(best_model_dir, 'model.pdparams'))
                    logger.info(
                        '[EVAL] The model with the best validation Acc ({:.4f}) was saved at iter {}.'
                        .format(best_acc, best_model_iter))

                    if use_vdl:
                        log_writer.add_scalar('Evaluate/Acc', acc, iter)
                        log_writer.add_scalar('Evaluate/Fp', fp, iter)
                        log_writer.add_scalar('Evaluate/Fn', fn, iter)
            batch_start = time.time()

    # Calculate flops.
    if local_rank == 0:
        _, c, h, w = images.shape
        _ = paddle.flops(
            model, [1, c, h, w],
            custom_ops={paddle.nn.SyncBatchNorm: op_flops_funs.count_syncbn})

    # Sleep for half a second to let dataloader release resources.
    time.sleep(0.5)
    if use_vdl:
        log_writer.close()
Esempio n. 6
0
def train(model,
          train_dataset,
          val_dataset=None,
          optimizer=None,
          save_dir='output',
          iters=10000,
          batch_size=2,
          resume_model=None,
          save_interval=1000,
          log_iters=10,
          num_workers=0,
          use_vdl=False,
          losses=None,
          keep_checkpoint_max=5,
          threshold=0.1,
          nms_kernel=7,
          top_k=200):
    """
    Launch training.

    Args:
        model(nn.Layer): A sementic segmentation model.
        train_dataset (paddle.io.Dataset): Used to read and process training datasets.
        val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
        optimizer (paddle.optimizer.Optimizer): The optimizer.
        save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'.
        iters (int, optional): How may iters to train the model. Defualt: 10000.
        batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2.
        resume_model (str, optional): The path of resume model.
        save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000.
        log_iters (int, optional): Display logging information at every log_iters. Default: 10.
        num_workers (int, optional): Num workers for data loader. Default: 0.
        use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False.
        losses (dict): A dict including 'types' and 'coef'. The length of coef should equal to 1 or len(losses['types']).
            The 'types' item is a list of object of paddleseg.models.losses while the 'coef' item is a list of the relevant coefficient.
        keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
        threshold (float, optional): A Float, threshold applied to center heatmap score. Default: 0.1.
        nms_kernel (int, optional): An Integer, NMS max pooling kernel size. Default: 7.
        top_k (int, optional): An Integer, top k centers to keep. Default: 200.
    """
    model.train()
    nranks = paddle.distributed.ParallelEnv().nranks
    local_rank = paddle.distributed.ParallelEnv().local_rank

    start_iter = 0
    if resume_model is not None:
        start_iter = resume(model, optimizer, resume_model)

    if not os.path.isdir(save_dir):
        if os.path.exists(save_dir):
            os.remove(save_dir)
        os.makedirs(save_dir)

    if nranks > 1:
        # Initialize parallel environment if not done.
        if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
        ):
            paddle.distributed.init_parallel_env()
            ddp_model = paddle.DataParallel(model)
        else:
            ddp_model = paddle.DataParallel(model)

    batch_sampler = paddle.io.DistributedBatchSampler(train_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=True,
                                                      drop_last=True)

    loader = paddle.io.DataLoader(
        train_dataset,
        batch_sampler=batch_sampler,
        num_workers=num_workers,
        return_list=True,
    )

    if use_vdl:
        from visualdl import LogWriter
        log_writer = LogWriter(save_dir)

    avg_loss = 0.0
    avg_loss_list = []
    iters_per_epoch = len(batch_sampler)
    best_pq = -1.0
    best_model_iter = -1
    reader_cost_averager = TimeAverager()
    batch_cost_averager = TimeAverager()
    save_models = deque()
    batch_start = time.time()

    iter = start_iter
    while iter < iters:
        for data in loader:
            iter += 1
            if iter > iters:
                break
            reader_cost_averager.record(time.time() - batch_start)
            images = data[0]
            semantic = data[1]
            semantic_weights = data[2]
            center = data[3]
            center_weights = data[4]
            offset = data[5]
            offset_weights = data[6]
            foreground = data[7]

            if nranks > 1:
                logits_list = ddp_model(images)
            else:
                logits_list = model(images)

            loss_list = loss_computation(logits_list=logits_list,
                                         losses=losses,
                                         semantic=semantic,
                                         semantic_weights=semantic_weights,
                                         center=center,
                                         center_weights=center_weights,
                                         offset=offset,
                                         offset_weights=offset_weights)
            loss = sum(loss_list)
            loss.backward()

            optimizer.step()
            lr = optimizer.get_lr()
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                optimizer._learning_rate.step()
            model.clear_gradients()
            avg_loss += loss.numpy()[0]
            if not avg_loss_list:
                avg_loss_list = [l.numpy() for l in loss_list]
            else:
                for i in range(len(loss_list)):
                    avg_loss_list[i] += loss_list[i].numpy()
            batch_cost_averager.record(time.time() - batch_start,
                                       num_samples=batch_size)

            if (iter) % log_iters == 0 and local_rank == 0:
                avg_loss /= log_iters
                avg_loss_list = [l[0] / log_iters for l in avg_loss_list]
                remain_iters = iters - iter
                avg_train_batch_cost = batch_cost_averager.get_average()
                avg_train_reader_cost = reader_cost_averager.get_average()
                eta = calculate_eta(remain_iters, avg_train_batch_cost)
                logger.info(
                    "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.5f}, ips={:.4f} samples/sec | ETA {}"
                    .format((iter - 1) // iters_per_epoch + 1, iter, iters,
                            avg_loss, lr, avg_train_batch_cost,
                            avg_train_reader_cost,
                            batch_cost_averager.get_ips_average(), eta))
                logger.info(
                    "[LOSS] loss={:.4f}, semantic_loss={:.4f}, center_loss={:.4f}, offset_loss={:.4f}"
                    .format(avg_loss, avg_loss_list[0], avg_loss_list[1],
                            avg_loss_list[2]))
                if use_vdl:
                    log_writer.add_scalar('Train/loss', avg_loss, iter)
                    # Record all losses if there are more than 2 losses.
                    if len(avg_loss_list) > 1:
                        avg_loss_dict = {}
                        for i, value in enumerate(avg_loss_list):
                            avg_loss_dict['loss_' + str(i)] = value
                        for key, value in avg_loss_dict.items():
                            log_tag = 'Train/' + key
                            log_writer.add_scalar(log_tag, value, iter)

                    log_writer.add_scalar('Train/lr', lr, iter)
                    log_writer.add_scalar('Train/batch_cost',
                                          avg_train_batch_cost, iter)
                    log_writer.add_scalar('Train/reader_cost',
                                          avg_train_reader_cost, iter)

                avg_loss = 0.0
                avg_loss_list = []
                reader_cost_averager.reset()
                batch_cost_averager.reset()

            # save model
            if (iter % save_interval == 0
                    or iter == iters) and local_rank == 0:
                current_save_dir = os.path.join(save_dir,
                                                "iter_{}".format(iter))
                if not os.path.isdir(current_save_dir):
                    os.makedirs(current_save_dir)
                paddle.save(model.state_dict(),
                            os.path.join(current_save_dir, 'model.pdparams'))
                paddle.save(optimizer.state_dict(),
                            os.path.join(current_save_dir, 'model.pdopt'))
                save_models.append(current_save_dir)
                if len(save_models) > keep_checkpoint_max > 0:
                    model_to_remove = save_models.popleft()
                    shutil.rmtree(model_to_remove)

            # eval model
            if (iter % save_interval == 0 or iter == iters) and (
                    val_dataset
                    is not None) and local_rank == 0 and iter > iters // 2:
                num_workers = 1 if num_workers > 0 else 0
                panoptic_results, semantic_results, instance_results = evaluate(
                    model,
                    val_dataset,
                    threshold=threshold,
                    nms_kernel=nms_kernel,
                    top_k=top_k,
                    num_workers=num_workers,
                    print_detail=False)
                pq = panoptic_results['pan_seg']['All']['pq']
                miou = semantic_results['sem_seg']['mIoU']
                map = instance_results['ins_seg']['mAP']
                map50 = instance_results['ins_seg']['mAP50']
                logger.info(
                    "[EVAL] PQ: {:.4f}, mIoU: {:.4f}, mAP: {:.4f}, mAP50: {:.4f}"
                    .format(pq, miou, map, map50))
                model.train()

            # save best model and add evaluate results to vdl
            if (iter % save_interval == 0
                    or iter == iters) and local_rank == 0:
                if val_dataset is not None and iter > iters // 2:
                    if pq > best_pq:
                        best_pq = pq
                        best_model_iter = iter
                        best_model_dir = os.path.join(save_dir, "best_model")
                        paddle.save(
                            model.state_dict(),
                            os.path.join(best_model_dir, 'model.pdparams'))
                    logger.info(
                        '[EVAL] The model with the best validation pq ({:.4f}) was saved at iter {}.'
                        .format(best_pq, best_model_iter))

                    if use_vdl:
                        log_writer.add_scalar('Evaluate/PQ', pq, iter)
                        log_writer.add_scalar('Evaluate/mIoU', miou, iter)
                        log_writer.add_scalar('Evaluate/mAP', map, iter)
                        log_writer.add_scalar('Evaluate/mAP50', map50, iter)
            batch_start = time.time()

    # Calculate flops.
    if local_rank == 0:

        def count_syncbn(m, x, y):
            x = x[0]
            nelements = x.numel()
            m.total_ops += int(2 * nelements)

        _, c, h, w = images.shape
        flops = paddle.flops(
            model, [1, c, h, w],
            custom_ops={paddle.nn.SyncBatchNorm: count_syncbn})

    # Sleep for half a second to let dataloader release resources.
    time.sleep(0.5)
    if use_vdl:
        log_writer.close()
Esempio n. 7
0
def evaluate(model,
             eval_dataset,
             num_workers=0,
             print_detail=True,
             save_img=True):
    """
    Launch evalution.

    Args:
        model(nn.Layer): A sementic segmentation model.
        eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
        num_workers (int, optional): Num workers for data loader. Default: 0.
        print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.

    Returns:
        float: The mIoU of validation datasets.
        float: The accuracy of validation datasets.
    """
    logger.info('Validating')
    evaluator = Eval(eval_dataset.NUM_CLASSES)
    evaluator.reset()

    model.eval()

    nranks = paddle.distributed.ParallelEnv().nranks
    local_rank = paddle.distributed.ParallelEnv().local_rank
    if nranks > 1:
        # Initialize parallel environment if not done.
        if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
        ):
            paddle.distributed.init_parallel_env()

    batch_sampler = paddle.io.DistributedBatchSampler(
        eval_dataset, batch_size=1, shuffle=False, drop_last=True)
    loader = paddle.io.DataLoader(
        eval_dataset,
        batch_sampler=batch_sampler,
        num_workers=num_workers,
        return_list=True,
    )

    progbar_val = progbar.Progbar(
        target=len(loader), verbose=0 if nranks < 2 else 2)

    reader_cost_averager = TimeAverager()
    batch_cost_averager = TimeAverager()
    batch_start = time.time()

    with paddle.no_grad():
        for idx, (x, y, _, item) in enumerate(loader):
            reader_cost_averager.record(time.time() - batch_start)

            # Forward
            y = y.astype('int64')
            pred = model(x)  # 1, c, h, w
            if len(pred) > 1:
                pred = pred[0]

            # Convert to numpy
            label = y.squeeze(axis=1).numpy()  #
            argpred = np.argmax(pred.numpy(), axis=1)  # 1, 1, H, W
            if save_img:
                save_imgs(argpred, item, './output/')

            # Add to evaluator
            evaluator.add_batch(label, argpred)

            batch_cost_averager.record(
                time.time() - batch_start, num_samples=len(label))
            batch_cost = batch_cost_averager.get_average()
            reader_cost = reader_cost_averager.get_average()

            if local_rank == 0 and print_detail and idx % 10 == 0:
                progbar_val.update(idx + 1, [('batch_cost', batch_cost),
                                             ('reader cost', reader_cost)])
            reader_cost_averager.reset()
            batch_cost_averager.reset()
            batch_start = time.time()

        PA = evaluator.pixel_accuracy()
        MPA = evaluator.mean_pixel_accuracy()
        MIoU = evaluator.mean_iou()
        FWIoU = evaluator.fwiou()
        PC = evaluator.mean_precision()
        logger.info(
            'PA1:{:.3f}, MPA1:{:.3f}, MIoU1:{:.3f}, FWIoU1:{:.3f}, PC:{:.3f}'.
            format(PA, MPA, MIoU, FWIoU, PC))

    return PA, MPA, MIoU, FWIoU
Esempio n. 8
0
def train(model,
          train_dataset,
          val_dataset=None,
          optimizer=None,
          save_dir='output',
          iters=10000,
          batch_size=2,
          resume_model=None,
          save_interval=1000,
          log_iters=10,
          num_workers=0,
          use_vdl=False,
          losses=None,
          keep_checkpoint_max=5):
    """
    Launch training.
    Args:
        model(nn.Layer): A sementic segmentation model.
        train_dataset (paddle.io.Dataset): Used to read and process training datasets.
        val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
        optimizer (paddle.optimizer.Optimizer): The optimizer.
        save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'.
        iters (int, optional): How may iters to train the model. Defualt: 10000.
        batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2.
        resume_model (str, optional): The path of resume model.
        save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000.
        log_iters (int, optional): Display logging information at every log_iters. Default: 10.
        num_workers (int, optional): Num workers for data loader. Default: 0.
        use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False.
        losses (dict): A dict including 'types' and 'coef'. The length of coef should equal to 1 or len(losses['types']).
            The 'types' item is a list of object of paddleseg.models.losses while the 'coef' item is a list of the relevant coefficient.
        keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
    """
    model.train()
    nranks = paddle.distributed.ParallelEnv().nranks
    local_rank = paddle.distributed.ParallelEnv().local_rank

    start_iter = 0
    if resume_model is not None:
        start_iter = resume(model, optimizer, resume_model)

    if not os.path.isdir(save_dir):
        if os.path.exists(save_dir):
            os.remove(save_dir)
        os.makedirs(save_dir)

    if nranks > 1:
        # Initialize parallel environment if not done.
        if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
        ):
            paddle.distributed.init_parallel_env()
            ddp_model = paddle.DataParallel(model)
        else:
            ddp_model = paddle.DataParallel(model)

    batch_sampler = paddle.io.DistributedBatchSampler(train_dataset,
                                                      batch_size=batch_size,
                                                      shuffle=True,
                                                      drop_last=True)

    loader = paddle.io.DataLoader(
        train_dataset,
        batch_sampler=batch_sampler,
        num_workers=num_workers,
        return_list=True,
    )

    if use_vdl:
        from visualdl import LogWriter
        log_writer = LogWriter(save_dir)

    avg_loss = 0.0
    avg_loss_list = []
    iters_per_epoch = len(batch_sampler)
    best_mean_iou = -1.0
    best_model_iter = -1
    reader_cost_averager = TimeAverager()
    batch_cost_averager = TimeAverager()
    save_models = deque()
    batch_start = time.time()

    iter = start_iter
    while iter < iters:
        for data in loader:
            iter += 1
            if iter > iters:
                break
            reader_cost_averager.record(time.time() - batch_start)
            images = data[0]
            labels = data[1].astype('int64')
            edges = None
            if len(data) == 3:
                edges = data[2].astype('int64')

            if nranks > 1:
                logits_list = ddp_model(images)
            else:
                logits_list = model(images)
            loss_list = loss_computation(logits_list=logits_list,
                                         labels=labels,
                                         losses=losses,
                                         edges=edges)
            loss = sum(loss_list)
            loss.backward()

            optimizer.step()
            lr = optimizer.get_lr()
            if isinstance(optimizer._learning_rate,
                          paddle.optimizer.lr.LRScheduler):
                optimizer._learning_rate.step()
            model.clear_gradients()
            avg_loss += loss.numpy()[0]
            if not avg_loss_list:
                avg_loss_list = [l.numpy() for l in loss_list]
            else:
                for i in range(len(loss_list)):
                    avg_loss_list[i] += loss_list[i].numpy()
            batch_cost_averager.record(time.time() - batch_start,
                                       num_samples=batch_size)

            if (iter) % log_iters == 0 and local_rank == 0:
                avg_loss /= log_iters
                avg_loss_list = [l[0] / log_iters for l in avg_loss_list]
                remain_iters = iters - iter
                avg_train_batch_cost = batch_cost_averager.get_average()
                avg_train_reader_cost = reader_cost_averager.get_average()
                eta = calculate_eta(remain_iters, avg_train_batch_cost)
                logger.info(
                    "[TRAIN] epoch: {}, iter: {}/{}, loss: {:.4f}, lr: {:.6f}, batch_cost: {:.4f}, reader_cost: {:.5f}, ips: {:.4f} samples/sec | ETA {}"
                    .format((iter - 1) // iters_per_epoch + 1, iter, iters,
                            avg_loss, lr, avg_train_batch_cost,
                            avg_train_reader_cost,
                            batch_cost_averager.get_ips_average(), eta))
                if use_vdl:
                    log_writer.add_scalar('Train/loss', avg_loss, iter)
                    # Record all losses if there are more than 2 losses.
                    if len(avg_loss_list) > 1:
                        avg_loss_dict = {}
                        for i, value in enumerate(avg_loss_list):
                            avg_loss_dict['loss_' + str(i)] = value
                        for key, value in avg_loss_dict.items():
                            log_tag = 'Train/' + key
                            log_writer.add_scalar(log_tag, value, iter)

                    log_writer.add_scalar('Train/lr', lr, iter)
                    log_writer.add_scalar('Train/batch_cost',
                                          avg_train_batch_cost, iter)
                    log_writer.add_scalar('Train/reader_cost',
                                          avg_train_reader_cost, iter)
                avg_loss = 0.0
                avg_loss_list = []
                reader_cost_averager.reset()
                batch_cost_averager.reset()

            if (iter % save_interval == 0 or iter == iters) and (val_dataset
                                                                 is not None):
                num_workers = 1 if num_workers > 0 else 0
                mean_iou, acc, class_iou, _, _ = evaluate(
                    model, val_dataset, num_workers=num_workers)
                model.train()

            if (iter % save_interval == 0
                    or iter == iters) and local_rank == 0:
                current_save_dir = os.path.join(save_dir,
                                                "iter_{}".format(iter))
                if not os.path.isdir(current_save_dir):
                    os.makedirs(current_save_dir)
                paddle.save(model.state_dict(),
                            os.path.join(current_save_dir, 'model.pdparams'))
                paddle.save(optimizer.state_dict(),
                            os.path.join(current_save_dir, 'model.pdopt'))
                save_models.append(current_save_dir)
                if len(save_models) > keep_checkpoint_max > 0:
                    model_to_remove = save_models.popleft()
                    shutil.rmtree(model_to_remove)

                if val_dataset is not None:
                    if mean_iou > best_mean_iou:
                        best_mean_iou = mean_iou
                        best_model_iter = iter
                        best_model_dir = os.path.join(save_dir, "best_model")
                        paddle.save(
                            model.state_dict(),
                            os.path.join(best_model_dir, 'model.pdparams'))
                    logger.info(
                        '[EVAL] The model with the best validation mIoU ({:.4f}) was saved at iter {}.'
                        .format(best_mean_iou, best_model_iter))

                    if use_vdl:
                        log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter)
                        for i, iou in enumerate(class_iou):
                            log_writer.add_scalar('Evaluate/IoU {}'.format(i),
                                                  float(iou), iter)

                        log_writer.add_scalar('Evaluate/Acc', acc, iter)
            batch_start = time.time()

    # Calculate flops.
    if local_rank == 0:

        def count_syncbn(m, x, y):
            x = x[0]
            nelements = x.numel()
            m.total_ops += int(2 * nelements)

        _, c, h, w = images.shape
        flops = paddle.flops(
            model, [1, c, h, w],
            custom_ops={paddle.nn.SyncBatchNorm: count_syncbn})

    # Sleep for half a second to let dataloader release resources.
    time.sleep(0.5)
    if use_vdl:
        log_writer.close()
Esempio n. 9
0
    def train(self,
              train_dataset_src,
              train_dataset_tgt,
              val_dataset_tgt=None,
              val_dataset_src=None,
              optimizer=None,
              save_dir='output',
              iters=10000,
              batch_size=2,
              resume_model=None,
              save_interval=1000,
              log_iters=10,
              num_workers=0,
              use_vdl=False,
              keep_checkpoint_max=5,
              test_config=None):
        """
        Launch training.

        Args:
            train_dataset (paddle.io.Dataset): Used to read and process training datasets.
            val_dataset_tgt (paddle.io.Dataset, optional): Used to read and process validation datasets.
            optimizer (paddle.optimizer.Optimizer): The optimizer.
            save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'.
            iters (int, optional): How may iters to train the model. Defualt: 10000.
            batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2.
            resume_model (str, optional): The path of resume model.
            save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000.
            log_iters (int, optional): Display logging information at every log_iters. Default: 10.
            num_workers (int, optional): Num workers for data loader. Default: 0.
            use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False.
            keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
            test_config(dict, optional): Evaluation config.
        """
        start_iter = 0
        self.model.train()
        nranks = paddle.distributed.ParallelEnv().nranks
        local_rank = paddle.distributed.ParallelEnv().local_rank

        if resume_model is not None:
            logger.info(resume_model)
            start_iter = resume(self.model, optimizer, resume_model)
        load_ema_model(self.model, self.resume_ema)

        if not os.path.isdir(save_dir):
            if os.path.exists(save_dir):
                os.remove(save_dir)
            os.makedirs(save_dir)

        if nranks > 1:
            paddle.distributed.fleet.init(is_collective=True)
            optimizer = paddle.distributed.fleet.distributed_optimizer(
                optimizer)  # The return is Fleet object
            ddp_model = paddle.distributed.fleet.distributed_model(self.model)

        batch_sampler_src = paddle.io.DistributedBatchSampler(
            train_dataset_src,
            batch_size=batch_size,
            shuffle=True,
            drop_last=True)

        loader_src = paddle.io.DataLoader(
            train_dataset_src,
            batch_sampler=batch_sampler_src,
            num_workers=num_workers,
            return_list=True,
            worker_init_fn=worker_init_fn,
        )
        batch_sampler_tgt = paddle.io.DistributedBatchSampler(
            train_dataset_tgt,
            batch_size=batch_size,
            shuffle=True,
            drop_last=True)

        loader_tgt = paddle.io.DataLoader(
            train_dataset_tgt,
            batch_sampler=batch_sampler_tgt,
            num_workers=num_workers,
            return_list=True,
            worker_init_fn=worker_init_fn,
        )

        if use_vdl:
            from visualdl import LogWriter
            log_writer = LogWriter(save_dir)

        iters_per_epoch = len(batch_sampler_tgt)
        best_mean_iou = -1.0
        best_model_iter = -1
        reader_cost_averager = TimeAverager()
        batch_cost_averager = TimeAverager()
        save_models = deque()
        batch_start = time.time()

        iter = start_iter
        while iter < iters:
            for _, (data_src,
                    data_tgt) in enumerate(zip(loader_src, loader_tgt)):

                reader_cost_averager.record(time.time() - batch_start)
                loss_dict = {}

                #### training #####
                images_tgt = data_tgt[0]
                labels_tgt = data_tgt[1].astype('int64')
                images_src = data_src[0]
                labels_src = data_src[1].astype('int64')

                edges_src = data_src[2].astype('int64')
                edges_tgt = data_tgt[2].astype('int64')

                if nranks > 1:
                    logits_list_src = ddp_model(images_src)
                else:
                    logits_list_src = self.model(images_src)

                ##### source seg & edge loss ####
                loss_src_seg_main = self.celoss(logits_list_src[0], labels_src)
                loss_src_seg_aux = 0.1 * self.celoss(logits_list_src[1],
                                                     labels_src)

                loss_src_seg = loss_src_seg_main + loss_src_seg_aux
                loss_dict["source_main"] = loss_src_seg_main.numpy()[0]
                loss_dict["source_aux"] = loss_src_seg_aux.numpy()[0]
                loss = loss_src_seg
                del loss_src_seg, loss_src_seg_aux, loss_src_seg_main

                #### generate target pseudo label  ####
                with paddle.no_grad():
                    if nranks > 1:
                        logits_list_tgt = ddp_model(images_tgt)
                    else:
                        logits_list_tgt = self.model(images_tgt)

                    pred_P_1 = F.softmax(logits_list_tgt[0], axis=1)
                    labels_tgt_psu = paddle.argmax(pred_P_1.detach(), axis=1)

                    # aux label
                    pred_P_2 = F.softmax(logits_list_tgt[1], axis=1)
                    pred_c = (pred_P_1 + pred_P_2) / 2
                    labels_tgt_psu_aux = paddle.argmax(pred_c.detach(), axis=1)

                if self.edgeconstrain:
                    loss_src_edge = self.bceloss_src(
                        logits_list_src[2], edges_src)  # 1, 2 640, 1280
                    src_edge = paddle.argmax(
                        logits_list_src[2].detach().clone(),
                        axis=1)  # 1, 1, 640,1280
                    src_edge_acc = ((src_edge == edges_src).numpy().sum().astype('float32')\
                                                /functools.reduce(lambda a, b: a * b, src_edge.shape))*100

                    if (not self.src_only) and (iter > 200000):
                        ####  target seg & edge loss ####
                        logger.info("Add target edege loss")
                        edges_tgt = Func.mask_to_binary_edge(
                            labels_tgt_psu.detach().clone().numpy(),
                            radius=2,
                            num_classes=train_dataset_tgt.NUM_CLASSES)
                        edges_tgt = paddle.to_tensor(edges_tgt, dtype='int64')

                        loss_tgt_edge = self.bceloss_tgt(
                            logits_list_tgt[2], edges_tgt)
                        loss_edge = loss_tgt_edge + loss_src_edge
                    else:
                        loss_tgt_edge = paddle.zeros([1])
                        loss_edge = loss_src_edge

                    loss += loss_edge

                    loss_dict['target_edge'] = loss_tgt_edge.numpy()[0]
                    loss_dict['source_edge'] = loss_src_edge.numpy()[0]

                    del loss_edge, loss_tgt_edge, loss_src_edge

                #### target aug loss #######
                augs = augmentation.get_augmentation()
                images_tgt_aug, labels_tgt_aug = augmentation.augment(
                    images=images_tgt.cpu(),
                    labels=labels_tgt_psu.detach().cpu(),
                    aug=augs,
                    iters="{}_1".format(iter))
                images_tgt_aug = images_tgt_aug.cuda()
                labels_tgt_aug = labels_tgt_aug.cuda()

                _, labels_tgt_aug_aux = augmentation.augment(
                    images=images_tgt.cpu(),
                    labels=labels_tgt_psu_aux.detach().cpu(),
                    aug=augs,
                    iters="{}_2".format(iter))
                labels_tgt_aug_aux = labels_tgt_aug_aux.cuda()

                if nranks > 1:
                    logits_list_tgt_aug = ddp_model(images_tgt_aug)
                else:
                    logits_list_tgt_aug = self.model(images_tgt_aug)

                loss_tgt_aug_main = 0.1 * (self.celoss(logits_list_tgt_aug[0],
                                                       labels_tgt_aug))
                loss_tgt_aug_aux = 0.1 * (0.1 * self.celoss(
                    logits_list_tgt_aug[1], labels_tgt_aug_aux))

                loss_tgt_aug = loss_tgt_aug_aux + loss_tgt_aug_main

                loss += loss_tgt_aug

                loss_dict['target_aug_main'] = loss_tgt_aug_main.numpy()[0]
                loss_dict['target_aug_aux'] = loss_tgt_aug_aux.numpy()[0]
                del images_tgt_aug, labels_tgt_aug_aux, images_tgt, \
                    loss_tgt_aug, loss_tgt_aug_aux, loss_tgt_aug_main

                #### edge input seg; src & tgt edge pull in ######
                if self.edgepullin:
                    src_edge_logit = logits_list_src[2]
                    feat_src = paddle.concat(
                        [logits_list_src[0], src_edge_logit], axis=1).detach()

                    out_src = self.model.fusion(feat_src)
                    loss_src_edge_rec = self.celoss(out_src, labels_src)

                    tgt_edge_logit = logits_list_tgt_aug[2]
                    # tgt_edge_logit = paddle.to_tensor(
                    #     Func.mask_to_onehot(edges_tgt.squeeze().numpy(), 2)
                    #     ).unsqueeze(0).astype('float32')
                    feat_tgt = paddle.concat(
                        [logits_list_tgt[0], tgt_edge_logit], axis=1).detach()

                    out_tgt = self.model.fusion(feat_tgt)
                    loss_tgt_edge_rec = self.celoss(out_tgt, labels_tgt)

                    loss_edge_rec = loss_tgt_edge_rec + loss_src_edge_rec
                    loss += loss_edge_rec

                    loss_dict['src_edge_rec'] = loss_src_edge_rec.numpy()[0]
                    loss_dict['tgt_edge_rec'] = loss_tgt_edge_rec.numpy()[0]

                    del loss_tgt_edge_rec, loss_src_edge_rec

                #### mask input feature & pullin  ######
                if self.featurepullin:
                    # inner-class loss
                    feat_src = logits_list_src[0]
                    feat_tgt = logits_list_tgt_aug[0]
                    center_src_s, center_tgt_s = [], []

                    total_pixs = logits_list_src[0].shape[2] * \
                                    logits_list_src[0].shape[3]

                    for i in range(train_dataset_tgt.NUM_CLASSES):
                        pred = paddle.argmax(
                            logits_list_src[0].detach().clone(),
                            axis=1).unsqueeze(0)  # 1, 1, 640, 1280
                        sel_num = paddle.sum((pred == i).astype('float32'))
                        # ignore tensor that do not have features in this img
                        if sel_num > 0:
                            feat_sel_src = paddle.where(
                                (pred == i).expand_as(feat_src), feat_src,
                                paddle.zeros(feat_src.shape))
                            center_src = paddle.mean(feat_sel_src, axis=[
                                2, 3
                            ]) / (sel_num / total_pixs)  # 1, C

                            self.src_centers[i] = 0.99 * self.src_centers[
                                i] + (1 - 0.99) * center_src

                        pred = labels_tgt_aug.unsqueeze(0)  # 1, 1,  512, 512
                        sel_num = paddle.sum((pred == i).astype('float32'))
                        if sel_num > 0:
                            feat_sel_tgt = paddle.where(
                                (pred == i).expand_as(feat_tgt), feat_tgt,
                                paddle.zeros(feat_tgt.shape))
                            center_tgt = paddle.mean(feat_sel_tgt, axis=[
                                2, 3
                            ]) / (sel_num / total_pixs)

                            self.tgt_centers[i] = 0.99 * self.tgt_centers[
                                i] + (1 - 0.99) * center_tgt
                        center_src_s.append(center_src)
                        center_tgt_s.append(center_tgt)

                    if iter >= 3000:  # average center structure alignment
                        src_centers = paddle.concat(self.src_centers, axis=0)
                        tgt_centers = paddle.concat(self.tgt_centers,
                                                    axis=0)  # 19, 2048

                        relatmat_src = paddle.matmul(src_centers,
                                                     src_centers,
                                                     transpose_y=True)  # 19,19
                        relatmat_tgt = paddle.matmul(tgt_centers,
                                                     tgt_centers,
                                                     transpose_y=True)

                        loss_intra_relate = self.klloss(relatmat_src, (relatmat_tgt+relatmat_src)/2) \
                                            + self.klloss(relatmat_tgt, (relatmat_tgt+relatmat_src)/2)

                        loss_pix_align_src = self.mseloss(
                            paddle.to_tensor(center_src_s),
                            paddle.to_tensor(
                                self.src_centers).detach().clone())
                        loss_pix_align_tgt = self.mseloss(
                            paddle.to_tensor(center_tgt_s),
                            paddle.to_tensor(
                                self.tgt_centers).detach().clone())

                        loss_feat_align = loss_pix_align_src + loss_pix_align_tgt + loss_intra_relate

                        loss += loss_feat_align

                        loss_dict['loss_pix_align_src'] = \
                                loss_pix_align_src.numpy()[0]
                        loss_dict['loss_pix_align_tgt'] = \
                                loss_pix_align_tgt.numpy()[0]
                        loss_dict['loss_intra_relate'] = \
                                loss_intra_relate.numpy()[0]

                        del loss_pix_align_tgt, loss_pix_align_src, loss_intra_relate,

                    self.tgt_centers = [
                        item.detach().clone() for item in self.tgt_centers
                    ]
                    self.src_centers = [
                        item.detach().clone() for item in self.src_centers
                    ]

                loss.backward()
                del loss
                loss = sum(loss_dict.values())

                optimizer.step()
                self.ema.update_params()

                with paddle.no_grad():
                    ##### log & save #####
                    lr = optimizer.get_lr()
                    # update lr
                    if isinstance(optimizer, paddle.distributed.fleet.Fleet):
                        lr_sche = optimizer.user_defined_optimizer._learning_rate
                    else:
                        lr_sche = optimizer._learning_rate
                    if isinstance(lr_sche, paddle.optimizer.lr.LRScheduler):
                        lr_sche.step()

                    if self.cfg['save_edge']:
                        tgt_edge = paddle.argmax(
                            logits_list_tgt_aug[2].detach().clone(),
                            axis=1)  # 1, 1, 640,1280
                        src_feed_gt = paddle.argmax(
                            src_edge_logit.astype('float32'), axis=1)
                        tgt_feed_gt = paddle.argmax(
                            tgt_edge_logit.astype('float32'), axis=1)
                        logger.info('src_feed_gt_{}_{}_{}'.format(
                            src_feed_gt.shape, src_feed_gt.max(),
                            src_feed_gt.min()))
                        logger.info('tgt_feed_gt_{}_{}_{}'.format(
                            tgt_feed_gt.shape, max(tgt_feed_gt),
                            min(tgt_feed_gt)))
                        save_edge(src_feed_gt, 'src_feed_gt_{}'.format(iter))
                        save_edge(tgt_feed_gt, 'tgt_feed_gt_{}'.format(iter))
                        save_edge(tgt_edge, 'tgt_pred_{}'.format(iter))
                        save_edge(src_edge,
                                  'src_pred_{}_{}'.format(iter, src_edge_acc))
                        save_edge(edges_src, 'src_gt_{}'.format(iter))
                        save_edge(edges_tgt, 'tgt_gt_{}'.format(iter))

                    self.model.clear_gradients()

                    batch_cost_averager.record(time.time() - batch_start,
                                               num_samples=batch_size)

                    iter += 1
                    if (iter) % log_iters == 0 and local_rank == 0:
                        label_tgt_acc = ((labels_tgt == labels_tgt_psu).numpy().sum().astype('float32')\
                                            /functools.reduce(lambda a, b: a * b, labels_tgt_psu.shape))*100

                        remain_iters = iters - iter
                        avg_train_batch_cost = batch_cost_averager.get_average(
                        )
                        avg_train_reader_cost = reader_cost_averager.get_average(
                        )
                        eta = calculate_eta(remain_iters, avg_train_batch_cost)
                        logger.info(
                            "[TRAIN] epoch: {}, iter: {}/{}, loss: {:.4f}, tgt_pix_acc: {:.4f}, lr: {:.6f}, batch_cost: {:.4f}, reader_cost: {:.5f}, ips: {:.4f} samples/sec | ETA {}"
                            .format(
                                (iter - 1) // iters_per_epoch + 1, iter, iters,
                                loss, label_tgt_acc, lr, avg_train_batch_cost,
                                avg_train_reader_cost,
                                batch_cost_averager.get_ips_average(), eta))

                        if use_vdl:
                            log_writer.add_scalar('Train/loss', loss, iter)
                            # Record all losses if there are more than 2 losses.
                            if len(loss_dict) > 1:
                                for name, loss in loss_dict.items():
                                    log_writer.add_scalar(
                                        'Train/loss_' + name, loss, iter)

                            log_writer.add_scalar('Train/lr', lr, iter)
                            log_writer.add_scalar('Train/batch_cost',
                                                  avg_train_batch_cost, iter)
                            log_writer.add_scalar('Train/reader_cost',
                                                  avg_train_reader_cost, iter)
                            log_writer.add_scalar('Train/tgt_label_acc',
                                                  label_tgt_acc, iter)

                        reader_cost_averager.reset()
                        batch_cost_averager.reset()

                    if (iter % save_interval == 0 or iter
                            == iters) and (val_dataset_tgt is not None):
                        num_workers = 4 if num_workers > 0 else 0  # adjust num_worker=4

                        if test_config is None:
                            test_config = {}
                        self.ema.apply_shadow()
                        self.ema.model.eval()

                        PA_tgt, _, MIoU_tgt, _ = val.evaluate(
                            self.model,
                            val_dataset_tgt,
                            num_workers=num_workers,
                            **test_config)

                        if (iter % (save_interval * 30)) == 0 \
                            and self.cfg['eval_src']:  # add evaluate on src
                            PA_src, _, MIoU_src, _ = val.evaluate(
                                self.model,
                                val_dataset_src,
                                num_workers=num_workers,
                                **test_config)
                            logger.info(
                                '[EVAL] The source mIoU is ({:.4f}) at iter {}.'
                                .format(MIoU_src, iter))

                        self.ema.restore()
                        self.model.train()

                    if (iter % save_interval == 0
                            or iter == iters) and local_rank == 0:
                        current_save_dir = os.path.join(
                            save_dir, "iter_{}".format(iter))
                        if not os.path.isdir(current_save_dir):
                            os.makedirs(current_save_dir)
                        paddle.save(
                            self.model.state_dict(),
                            os.path.join(current_save_dir, 'model.pdparams'))
                        paddle.save(
                            self.ema.shadow,
                            os.path.join(current_save_dir,
                                         'model_ema.pdparams'))
                        paddle.save(
                            optimizer.state_dict(),
                            os.path.join(current_save_dir, 'model.pdopt'))
                        save_models.append(current_save_dir)
                        if len(save_models) > keep_checkpoint_max > 0:
                            model_to_remove = save_models.popleft()
                            shutil.rmtree(model_to_remove)

                        if val_dataset_tgt is not None:
                            if MIoU_tgt > best_mean_iou:
                                best_mean_iou = MIoU_tgt
                                best_model_iter = iter
                                best_model_dir = os.path.join(
                                    save_dir, "best_model")
                                paddle.save(
                                    self.model.state_dict(),
                                    os.path.join(best_model_dir,
                                                 'model.pdparams'))

                            logger.info(
                                '[EVAL] The model with the best validation mIoU ({:.4f}) was saved at iter {}.'
                                .format(best_mean_iou, best_model_iter))

                        if use_vdl:
                            log_writer.add_scalar('Evaluate/mIoU', MIoU_tgt,
                                                  iter)
                            log_writer.add_scalar('Evaluate/PA', PA_tgt, iter)

                            if self.cfg['eval_src']:
                                log_writer.add_scalar('Evaluate/mIoU_src',
                                                      MIoU_src, iter)
                                log_writer.add_scalar('Evaluate/PA_src',
                                                      PA_src, iter)

                    batch_start = time.time()

            self.ema.update_buffer()

        # # Calculate flops.
        if local_rank == 0:

            def count_syncbn(m, x, y):
                x = x[0]
                nelements = x.numel()
                m.total_ops += int(2 * nelements)

            _, c, h, w = images_src.shape
            flops = paddle.flops(
                self.model, [1, c, h, w],
                custom_ops={paddle.nn.SyncBatchNorm: count_syncbn})

        # Sleep for half a second to let dataloader release resources.
        time.sleep(0.5)
        if use_vdl:
            log_writer.close()
Esempio n. 10
0
def evaluate(model,
             eval_dataset,
             num_workers=0,
             is_view=False,
             save_dir='output',
             print_detail=True):
    """
    Launch evalution.

    Args:
        model(nn.Layer): A sementic segmentation model.
        eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
        num_workers (int, optional): Num workers for data loader. Default: 0.
        is_view (bool, optional): Whether to visualize results. Default: False.
        save_dir (str, optional): The directory to save the json or visualized results. Default: 'output'.
        print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.

    Returns:
        float: The acc of validation datasets.
        float: The fp of validation datasets.
        float: The fn of validation datasets.
    """
    model.eval()
    local_rank = paddle.distributed.ParallelEnv().local_rank

    loader = paddle.io.DataLoader(
        eval_dataset,
        batch_size=4,
        drop_last=False,
        num_workers=num_workers,
        return_list=True,
    )

    postprocessor = tusimple_processor.TusimpleProcessor(
        num_classes=eval_dataset.num_classes,
        cut_height=eval_dataset.cut_height,
        test_gt_json=eval_dataset.test_gt_json,
        save_dir=save_dir,
    )
    total_iters = len(loader)

    if print_detail:
        logger.info(
            "Start evaluating (total_samples: {}, total_iters: {})...".format(
                len(eval_dataset), total_iters))
    progbar_val = progbar.Progbar(target=total_iters, verbose=1)
    reader_cost_averager = TimeAverager()
    batch_cost_averager = TimeAverager()
    batch_start = time.time()
    with paddle.no_grad():
        for iter, (im, label, im_path) in enumerate(loader):
            reader_cost_averager.record(time.time() - batch_start)
            label = label.astype('int64')

            ori_shape = None

            time_start = time.time()
            pred = infer.inference(
                model,
                im,
                ori_shape=ori_shape,
                transforms=eval_dataset.transforms.transforms)
            time_end = time.time()

            postprocessor.dump_data_to_json(pred[1],
                                            im_path,
                                            run_time=time_end - time_start,
                                            is_dump_json=True,
                                            is_view=is_view)
            batch_cost_averager.record(time.time() - batch_start,
                                       num_samples=len(label))
            batch_cost = batch_cost_averager.get_average()
            reader_cost = reader_cost_averager.get_average()

            if local_rank == 0 and print_detail:
                progbar_val.update(iter + 1, [('batch_cost', batch_cost),
                                              ('reader cost', reader_cost)])
            reader_cost_averager.reset()
            batch_cost_averager.reset()
            batch_start = time.time()

    acc, fp, fn, eval_result = postprocessor.bench_one_submit(local_rank)

    if print_detail:
        logger.info(eval_result)
    return acc, fp, fn
Esempio n. 11
0
class Predictor:
    def __init__(self, args):
        self.cfg = DeployConfig(args.cfg)
        self.args = args
        self.compose = T.Compose(self.cfg.transforms)
        resize_h, resize_w = args.input_shape

        self.disflow = cv2.DISOpticalFlow_create(
            cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
        self.prev_gray = np.zeros((resize_h, resize_w), np.uint8)
        self.prev_cfd = np.zeros((resize_h, resize_w), np.float32)
        self.is_init = True

        pred_cfg = PredictConfig(self.cfg.model, self.cfg.params)
        pred_cfg.disable_glog_info()
        if self.args.use_gpu:
            pred_cfg.enable_use_gpu(100, 0)

        self.predictor = create_predictor(pred_cfg)
        if self.args.test_speed:
            self.cost_averager = TimeAverager()

    def preprocess(self, img):
        ori_shapes = []
        processed_imgs = []
        processed_img = self.compose(img)[0]
        processed_imgs.append(processed_img)
        ori_shapes.append(img.shape)
        return processed_imgs, ori_shapes

    def run(self, img, bg):
        input_names = self.predictor.get_input_names()
        input_handle = self.predictor.get_input_handle(input_names[0])

        processed_imgs, ori_shapes = self.preprocess(img)

        data = np.array(processed_imgs)
        input_handle.reshape(data.shape)
        input_handle.copy_from_cpu(data)
        if self.args.test_speed:
            start = time.time()

        self.predictor.run()

        if self.args.test_speed:
            self.cost_averager.record(time.time() - start)
        output_names = self.predictor.get_output_names()
        output_handle = self.predictor.get_output_handle(output_names[0])
        output = output_handle.copy_to_cpu()
        return self.postprocess(output, img, ori_shapes[0], bg)

    def postprocess(self, pred, img, ori_shape, bg):
        if not os.path.exists(self.args.save_dir):
            os.makedirs(self.args.save_dir)
        resize_w = pred.shape[-1]
        resize_h = pred.shape[-2]
        if self.args.soft_predict:
            if self.args.use_optic_flow:
                score_map = pred[:, 1, :, :].squeeze(0)
                score_map = 255 * score_map
                cur_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                cur_gray = cv2.resize(cur_gray, (resize_w, resize_h))
                optflow_map = optic_flow_process(cur_gray, score_map, self.prev_gray, self.prev_cfd, \
                        self.disflow, self.is_init)
                self.prev_gray = cur_gray.copy()
                self.prev_cfd = optflow_map.copy()
                self.is_init = False

                score_map = np.repeat(optflow_map[:, :, np.newaxis], 3, axis=2)
                score_map = np.transpose(score_map, [2, 0, 1])[np.newaxis, ...]
                score_map = reverse_transform(paddle.to_tensor(score_map),
                                              ori_shape,
                                              self.cfg.transforms,
                                              mode='bilinear')
                alpha = np.transpose(score_map.numpy().squeeze(0),
                                     [1, 2, 0]) / 255
            else:
                score_map = pred[:, 1, :, :]
                score_map = score_map[np.newaxis, ...]
                score_map = reverse_transform(paddle.to_tensor(score_map),
                                              ori_shape,
                                              self.cfg.transforms,
                                              mode='bilinear')
                alpha = np.transpose(score_map.numpy().squeeze(0), [1, 2, 0])

        else:
            if pred.ndim == 3:
                pred = pred[:, np.newaxis, ...]
            result = reverse_transform(paddle.to_tensor(pred, dtype='float32'),
                                       ori_shape,
                                       self.cfg.transforms,
                                       mode='bilinear')

            result = np.array(result)
            if self.args.add_argmax:
                result = np.argmax(result, axis=1)
            else:
                result = result.squeeze(1)
            alpha = np.transpose(result, [1, 2, 0])

        # background replace
        h, w, _ = img.shape
        bg = cv2.resize(bg, (w, h))
        if bg.ndim == 2:
            bg = bg[..., np.newaxis]

        comb = (alpha * img + (1 - alpha) * bg).astype(np.uint8)
        return comb
Esempio n. 12
0
def evaluate(model,
             eval_dataset,
             threshold=0.1,
             nms_kernel=7,
             top_k=200,
             num_workers=0,
             print_detail=True):
    """
    Launch evaluation.

    Args:
        model(nn.Layer): A sementic segmentation model.
        eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
        threshold (float, optional): Threshold applied to center heatmap score. Defalut: 0.1.
        nms_kernel (int, optional): NMS max pooling kernel size. Default: 7.
        top_k (int, optional): Top k centers to keep. Default: 200.
        num_workers (int, optional): Num workers for data loader. Default: 0.
        print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.

    Returns:
        dict: Panoptic evaluation results which includes PQ, RQ, SQ for all, each class, Things and stuff.
        dict: Semantic evaluation results which includes mIoU, fwIoU, mACC and pACC.
        dict: Instance evaluation results which includes mAP and mAP50, and also AP and AP50 for each class.

    """
    model.eval()
    nranks = paddle.distributed.ParallelEnv().nranks
    local_rank = paddle.distributed.ParallelEnv().local_rank
    if nranks > 1:
        # Initialize parallel environment if not done.
        if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
        ):
            paddle.distributed.init_parallel_env()
    batch_sampler = paddle.io.DistributedBatchSampler(eval_dataset,
                                                      batch_size=1,
                                                      shuffle=False,
                                                      drop_last=False)
    loader = paddle.io.DataLoader(
        eval_dataset,
        batch_sampler=batch_sampler,
        num_workers=num_workers,
        return_list=True,
    )

    total_iters = len(loader)
    semantic_metric = SemanticEvaluator(eval_dataset.num_classes,
                                        ignore_index=eval_dataset.ignore_index)
    instance_metric_AP50 = InstanceEvaluator(
        eval_dataset.num_classes,
        overlaps=0.5,
        thing_list=eval_dataset.thing_list)
    instance_metric_AP = InstanceEvaluator(eval_dataset.num_classes,
                                           overlaps=list(
                                               np.arange(0.5, 1.0, 0.05)),
                                           thing_list=eval_dataset.thing_list)
    panoptic_metric = PanopticEvaluator(
        num_classes=eval_dataset.num_classes,
        thing_list=eval_dataset.thing_list,
        ignore_index=eval_dataset.ignore_index,
        label_divisor=eval_dataset.label_divisor)

    if print_detail:
        logger.info(
            "Start evaluating (total_samples={}, total_iters={})...".format(
                len(eval_dataset), total_iters))
    progbar_val = progbar.Progbar(target=total_iters, verbose=1)
    reader_cost_averager = TimeAverager()
    batch_cost_averager = TimeAverager()
    batch_start = time.time()
    with paddle.no_grad():
        for iter, data in enumerate(loader):
            reader_cost_averager.record(time.time() - batch_start)
            im = data[0]
            raw_semantic_label = data[1]  # raw semantic label.
            raw_instance_label = data[2]
            raw_panoptic_label = data[3]
            ori_shape = raw_semantic_label.shape[-2:]

            semantic, semantic_softmax, instance, panoptic, ctr_hmp = infer.inference(
                model=model,
                im=im,
                transforms=eval_dataset.transforms.transforms,
                thing_list=eval_dataset.thing_list,
                label_divisor=eval_dataset.label_divisor,
                stuff_area=eval_dataset.stuff_area,
                ignore_index=eval_dataset.ignore_index,
                threshold=threshold,
                nms_kernel=nms_kernel,
                top_k=top_k,
                ori_shape=ori_shape)
            semantic = semantic.squeeze().numpy()
            semantic_softmax = semantic_softmax.squeeze().numpy()
            instance = instance.squeeze().numpy()
            panoptic = panoptic.squeeze().numpy()
            ctr_hmp = ctr_hmp.squeeze().numpy()
            raw_semantic_label = raw_semantic_label.squeeze().numpy()
            raw_instance_label = raw_instance_label.squeeze().numpy()
            raw_panoptic_label = raw_panoptic_label.squeeze().numpy()

            # update metric for semantic, instance, panoptic
            semantic_metric.update(semantic, raw_semantic_label)

            gts = instance_metric_AP.convert_gt_map(raw_semantic_label,
                                                    raw_instance_label)
            # print([i[0] for i in gts])
            preds = instance_metric_AP.convert_pred_map(
                semantic_softmax, panoptic)
            # print([(i[0], i[1]) for i in preds ])
            ignore_mask = raw_semantic_label == eval_dataset.ignore_index
            instance_metric_AP.update(preds, gts, ignore_mask=ignore_mask)
            instance_metric_AP50.update(preds, gts, ignore_mask=ignore_mask)

            panoptic_metric.update(panoptic, raw_panoptic_label)

            batch_cost_averager.record(time.time() - batch_start,
                                       num_samples=len(im))
            batch_cost = batch_cost_averager.get_average()
            reader_cost = reader_cost_averager.get_average()

            if local_rank == 0:
                progbar_val.update(iter + 1, [('batch_cost', batch_cost),
                                              ('reader cost', reader_cost)])
            reader_cost_averager.reset()
            batch_cost_averager.reset()
            batch_start = time.time()

    semantic_results = semantic_metric.evaluate()
    panoptic_results = panoptic_metric.evaluate()
    instance_results = OrderedDict()
    ins_ap = instance_metric_AP.evaluate()
    ins_ap50 = instance_metric_AP50.evaluate()
    instance_results['ins_seg'] = OrderedDict()
    instance_results['ins_seg']['mAP'] = ins_ap['ins_seg']['mAP']
    instance_results['ins_seg']['AP'] = ins_ap['ins_seg']['AP']
    instance_results['ins_seg']['mAP50'] = ins_ap50['ins_seg']['mAP']
    instance_results['ins_seg']['AP50'] = ins_ap50['ins_seg']['AP']

    if print_detail:
        logger.info(panoptic_results)
        print()
        logger.info(semantic_results)
        print()
        logger.info(instance_results)
        print()

        pq = panoptic_results['pan_seg']['All']['pq']
        miou = semantic_results['sem_seg']['mIoU']
        map = instance_results['ins_seg']['mAP']
        map50 = instance_results['ins_seg']['mAP50']
        logger.info(
            "PQ: {:.4f}, mIoU: {:.4f}, mAP: {:.4f}, mAP50: {:.4f}".format(
                pq, miou, map, map50))

    return panoptic_results, semantic_results, instance_results