Exemple #1
0
    def predict(self, test_data, batch_size=1, num_workers=0):
        """
        FIXME: add more comments and usage
        Args:
            test_data (Dataset|DataLoader): An iterable data loader is used for
                predict. An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader 
                is recomended.
            batch_size (int): Integer number. The batch size of train_data and eval_data. 
                When train_data and eval_data are both the instance of Dataloader, this 
                parameter will be ignored.
            num_workers (int): the number of subprocess to load data, 0 for no subprocess 
                used and loading data in main process. When train_data and eval_data are
                both the instance of Dataloader, this parameter will be ignored.
        """

        if fluid.in_dygraph_mode():
            feed_list = None
        else:
            feed_list = [x.forward() for x in self._inputs + self._labels]

        if test_data is not None and isinstance(test_data, Dataset):
            test_sampler = DistributedBatchSampler(test_data,
                                                   batch_size=batch_size)
            test_loader = DataLoader(test_data,
                                     batch_sampler=test_sampler,
                                     places=self._place,
                                     feed_list=feed_list,
                                     num_workers=num_workers,
                                     return_list=True)
        else:
            test_loader = test_data

        self._test_dataloader = test_loader

        loader = test_loader
        if not isinstance(test_loader, Iterable):
            loader = test_loader()

        outputs = None
        for data in tqdm.tqdm(loader):
            if not fluid.in_dygraph_mode():
                data = data[0]

            outs = self.test(*data)

            if outputs is None:
                outputs = outs
            else:
                outputs = [
                    np.vstack([x, outs[i]]) for i, x in enumerate(outputs)
                ]

        self._test_dataloader = None
        if test_loader is not None and self._adapter._nranks > 1 \
                    and isinstance(test_loader, DataLoader):
            outputs = [o[:len(test_loader.dataset)] for o in outputs]
        return outputs
Exemple #2
0
def main():
    global args, best_mIoU
    args = parser.parse_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    if args.dataset == 'LaneDet':
        num_class = 20
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    # get places
    places = fluid.cuda_places()

    with fluid.dygraph.guard():
        model = models.ERFNet(num_class, [576, 1024])
        input_mean = model.input_mean
        input_std = model.input_std

        if args.resume:
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint, _ = fluid.load_dygraph(args.resume)
            model.load_dict(checkpoint)
            print("=> checkpoint loaded successfully")
        else:
            print(("=> loading checkpoint '{}'".format('trained/ERFNet_trained')))
            checkpoint, _ = fluid.load_dygraph('trained/ERFNet_trained')
            model.load_dict(checkpoint)
            print("=> default checkpoint loaded successfully")

        # Data loading code
        test_dataset = ds.LaneDataSet(
            dataset_path='datasets/PreliminaryData',
            data_list=args.val_list,
            transform=[
                lambda x: cv2.resize(x, (1024, 576)),
                lambda x: x - np.asarray(input_mean)[None, None, :] / np.array(input_std)[None, None, :],
            ]
        )

        test_loader = DataLoader(
            test_dataset,
            places=places[0],
            batch_size=1,
            shuffle=False,
            num_workers=args.workers,
            collate_fn=collate_fn
        )

        ### evaluate ###
        mIoU = validate(test_loader, model)
        # print('mIoU: {}'.format(mIoU))
    return
Exemple #3
0
def create_data_loader(args, device, for_train=True):
    data_loaders = [None, None]
    data_prefixes = [args.train_data_prefix, args.eval_data_prefix
                     ] if args.eval_data_prefix else [args.train_data_prefix]
    for i, data_prefix in enumerate(data_prefixes):
        dataset = Seq2SeqDataset(
            fpattern=data_prefix + "." + args.src_lang,
            trg_fpattern=data_prefix + "." + args.tar_lang,
            src_vocab_fpath=args.vocab_prefix + "." + args.src_lang,
            trg_vocab_fpath=args.vocab_prefix + "." + args.tar_lang,
            token_delimiter=None,
            start_mark="<s>",
            end_mark="</s>",
            unk_mark="<unk>",
            max_length=args.max_len if i == 0 else None,
            truncate=True,
            trg_add_bos_eos=True)
        (args.src_vocab_size, args.tar_vocab_size, bos_id, eos_id,
         unk_id) = dataset.get_vocab_summary()
        batch_sampler = Seq2SeqBatchSampler(
            dataset=dataset,
            use_token_batch=False,
            batch_size=args.batch_size,
            pool_size=args.batch_size * 20,
            sort_type=SortType.POOL,
            shuffle=False if args.enable_ce else True,
            distribute_mode=True if i == 0 else False)
        data_loader = DataLoader(
            dataset=dataset,
            batch_sampler=batch_sampler,
            places=device,
            collate_fn=partial(
                prepare_train_input,
                bos_id=bos_id,
                eos_id=eos_id,
                pad_id=eos_id),
            num_workers=0,
            return_list=True)
        data_loaders[i] = data_loader
    return data_loaders
Exemple #4
0
def do_predict(args):
    device = paddle.set_device("gpu" if args.use_gpu else "cpu")
    fluid.enable_dygraph(device) if args.eager_run else None

    # define model
    inputs = [
        Input(
            [None, None], "int64", name="src_word"),
        Input(
            [None], "int64", name="src_length"),
    ]

    # def dataloader
    dataset = Seq2SeqDataset(
        fpattern=args.infer_file,
        src_vocab_fpath=args.vocab_prefix + "." + args.src_lang,
        trg_vocab_fpath=args.vocab_prefix + "." + args.tar_lang,
        token_delimiter=None,
        start_mark="<s>",
        end_mark="</s>",
        unk_mark="<unk>")
    trg_idx2word = Seq2SeqDataset.load_dict(
        dict_path=args.vocab_prefix + "." + args.tar_lang, reverse=True)
    (args.src_vocab_size, args.trg_vocab_size, bos_id, eos_id,
     unk_id) = dataset.get_vocab_summary()
    batch_sampler = Seq2SeqBatchSampler(
        dataset=dataset, use_token_batch=False, batch_size=args.batch_size)
    data_loader = DataLoader(
        dataset=dataset,
        batch_sampler=batch_sampler,
        places=device,
        collate_fn=partial(
            prepare_infer_input, bos_id=bos_id, eos_id=eos_id, pad_id=eos_id),
        num_workers=0,
        return_list=True)

    model_maker = AttentionInferModel if args.attention else BaseInferModel
    model = paddle.Model(
        model_maker(
            args.src_vocab_size,
            args.tar_vocab_size,
            args.hidden_size,
            args.hidden_size,
            args.num_layers,
            args.dropout,
            bos_id=bos_id,
            eos_id=eos_id,
            beam_size=args.beam_size,
            max_out_len=256),
        inputs=inputs)

    model.prepare()

    # load the trained model
    assert args.reload_model, (
        "Please set reload_model to load the infer model.")
    model.load(args.reload_model)

    # TODO(guosheng): use model.predict when support variant length
    with io.open(args.infer_output_file, 'w', encoding='utf-8') as f:
        for data in data_loader():
            finished_seq = model.test_batch(inputs=flatten(data))[0]
            finished_seq = finished_seq[:, :, np.newaxis] if len(
                finished_seq.shape) == 2 else finished_seq
            finished_seq = np.transpose(finished_seq, [0, 2, 1])
            for ins in finished_seq:
                for beam_idx, beam in enumerate(ins):
                    id_list = post_process_seq(beam, bos_id, eos_id)
                    word_list = [trg_idx2word[id] for id in id_list]
                    sequence = " ".join(word_list) + "\n"
                    f.write(sequence)
                    break
Exemple #5
0
    def __getitem__(self, idx):
        image, label = self.images[idx].astype('uint8'), self.labels[idx]

        image = np.reshape(image, [28, 28, 1])
        if self.transform is not None:
            image = self.transform(image)
        # print(image.shape, label)
        return image, label

    def __len__(self):
        return len(self.labels)


if __name__ == '__main__':
    dataset = MNIST(root='./test', chw_format=True, mode='train')
    from paddle import fluid
    from paddle.fluid.io import DataLoader

    place = fluid.CPUPlace()
    fluid.enable_dygraph(place)
    loader = DataLoader(dataset,
                        places=place,
                        return_list=True,
                        batch_size=256,
                        shuffle=True,
                        num_workers=0)
    print(len(loader))
    itr = iter(loader)
    images, labels = next(itr)
    print(images.numpy().max(), images.numpy().min(), images.shape)
Exemple #6
0
def train(model,
          train_dataset,
          places=None,
          eval_dataset=None,
          optimizer=None,
          save_dir='output',
          iters=10000,
          batch_size=2,
          resume_model=None,
          save_interval_iters=1000,
          log_iters=10,
          num_classes=None,
          num_workers=8,
          use_vdl=False):
    ignore_index = model.ignore_index
    nranks = ParallelEnv().nranks

    start_iter = 0
    if resume_model is not None:
        start_iter = resume(model, optimizer, resume_model)

    if not os.path.isdir(save_dir):
        if os.path.exists(save_dir):
            os.remove(save_dir)
        os.makedirs(save_dir)

    if nranks > 1:
        strategy = fluid.dygraph.prepare_context()
        ddp_model = fluid.dygraph.DataParallel(model, strategy)

    batch_sampler = DistributedBatchSampler(train_dataset,
                                            batch_size=batch_size,
                                            shuffle=True,
                                            drop_last=True)
    loader = DataLoader(
        train_dataset,
        batch_sampler=batch_sampler,
        places=places,
        num_workers=num_workers,
        return_list=True,
    )

    if use_vdl:
        from visualdl import LogWriter
        log_writer = LogWriter(save_dir)

    timer = Timer()
    avg_loss = 0.0
    iters_per_epoch = len(batch_sampler)
    best_mean_iou = -1.0
    best_model_iter = -1
    train_reader_cost = 0.0
    train_batch_cost = 0.0
    timer.start()

    iter = 0
    while iter < iters:
        for data in loader:
            iter += 1
            if iter > iters:
                break
            train_reader_cost += timer.elapsed_time()
            images = data[0]
            labels = data[1].astype('int64')
            if nranks > 1:
                loss = ddp_model(images, labels)
                # apply_collective_grads sum grads over multiple gpus.
                loss = ddp_model.scale_loss(loss)
                loss.backward()
                ddp_model.apply_collective_grads()
            else:
                loss = model(images, labels)
                loss.backward()
            optimizer.minimize(loss)
            model.clear_gradients()
            avg_loss += loss.numpy()[0]
            lr = optimizer.current_step_lr()
            train_batch_cost += timer.elapsed_time()
            if (iter) % log_iters == 0 and ParallelEnv().local_rank == 0:
                avg_loss /= log_iters
                avg_train_reader_cost = train_reader_cost / log_iters
                avg_train_batch_cost = train_batch_cost / log_iters
                train_reader_cost = 0.0
                train_batch_cost = 0.0
                remain_iters = iters - iter
                eta = calculate_eta(remain_iters, avg_train_batch_cost)
                logger.info(
                    "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}"
                    .format((iter - 1) // iters_per_epoch + 1, iter, iters,
                            avg_loss * nranks, lr, avg_train_batch_cost,
                            avg_train_reader_cost, eta))
                if use_vdl:
                    log_writer.add_scalar('Train/loss', avg_loss * nranks,
                                          iter)
                    log_writer.add_scalar('Train/lr', lr, iter)
                    log_writer.add_scalar('Train/batch_cost',
                                          avg_train_batch_cost, iter)
                    log_writer.add_scalar('Train/reader_cost',
                                          avg_train_reader_cost, iter)
                avg_loss = 0.0

            if (iter % save_interval_iters == 0
                    or iter == iters) and ParallelEnv().local_rank == 0:
                current_save_dir = os.path.join(save_dir,
                                                "iter_{}".format(iter))
                if not os.path.isdir(current_save_dir):
                    os.makedirs(current_save_dir)
                fluid.save_dygraph(model.state_dict(),
                                   os.path.join(current_save_dir, 'model'))
                fluid.save_dygraph(optimizer.state_dict(),
                                   os.path.join(current_save_dir, 'model'))

                if eval_dataset is not None:
                    mean_iou, avg_acc = evaluate(model,
                                                 eval_dataset,
                                                 model_dir=current_save_dir,
                                                 num_classes=num_classes,
                                                 ignore_index=ignore_index,
                                                 iter_id=iter)
                    if mean_iou > best_mean_iou:
                        best_mean_iou = mean_iou
                        best_model_iter = iter
                        best_model_dir = os.path.join(save_dir, "best_model")
                        fluid.save_dygraph(
                            model.state_dict(),
                            os.path.join(best_model_dir, 'model'))
                    logger.info(
                        'Current evaluated best model in eval_dataset is iter_{}, miou={:4f}'
                        .format(best_model_iter, best_mean_iou))

                    if use_vdl:
                        log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter)
                        log_writer.add_scalar('Evaluate/aAcc', avg_acc, iter)
                    model.train()
            timer.restart()
    if use_vdl:
        log_writer.close()
Exemple #7
0
    def fit(
        self,
        train_data=None,
        eval_data=None,
        batch_size=1,
        epochs=1,
        eval_freq=1,
        log_freq=10,
        save_dir=None,
        save_freq=1,
        verbose=2,
        drop_last=False,
        shuffle=True,
        num_workers=0,
        callbacks=None,
    ):
        """
        FIXME: add more comments and usage
        Args:
            train_data (Dataset|DataLoader): An iterable data loader is used for 
                train. An instance of paddle.fluid.io.Dataset or 
                paddle.fluid.io.Dataloader is recomended.
            eval_data (Dataset|DataLoader): An iterable data loader is used for
                evaluation at the end of epoch. If None, will not do evaluation. 
                An instance of paddle.fluid.io.Dataset or paddle.fluid.io.Dataloader 
                is recomended.
            batch_size (int): Integer number. The batch size of train_data and eval_data. 
                When train_data and eval_data are both the instance of Dataloader, this 
                parameter will be ignored.
            epochs (int): Integer number. The number of epochs to train the model.
            eval_freq (int): The frequency, in number of epochs, an evalutation
                is performed.
            log_freq (int): The frequency, in number of steps, the training logs
                are printed.
            save_dir(str|None): The directory to save checkpoint during training.
                If None, will not save checkpoint.
            save_freq (int): The frequency, in number of epochs, to save checkpoint.
            verbose (int): The verbosity mode, should be 0, 1, or 2.
                0 = silent, 1 = progress bar, 2 = one line per epoch.
            drop_last (bool): whether drop the last incomplete batch of train_data 
                when dataset size is not divisible by the batch size. When train_data 
                is an instance of Dataloader, this parameter will be ignored.
            shuffle (bool): whther to shuffle train_data. When train_data is an instance 
                of Dataloader, this parameter will be ignored.
            num_workers (int): the number of subprocess to load data, 0 for no subprocess 
                used and loading data in main process. When train_data and eval_data are
                both the instance of Dataloader, this parameter will be ignored.
            callbacks (Callback|None): A list of `Callback` instances to apply
                during training. If None, `ProgBarLogger` and `ModelCheckpoint`
                are automatically inserted.
        """

        assert train_data is not None, \
                "train_data must be given!"

        if fluid.in_dygraph_mode():
            feed_list = None
        else:
            feed_list = [x.forward() for x in self._inputs + self._labels]

        if isinstance(train_data, Dataset):
            train_sampler = DistributedBatchSampler(train_data,
                                                    batch_size=batch_size,
                                                    shuffle=shuffle,
                                                    drop_last=drop_last)
            train_loader = DataLoader(train_data,
                                      batch_sampler=train_sampler,
                                      places=self._place,
                                      feed_list=feed_list,
                                      num_workers=num_workers,
                                      return_list=True)
        else:
            train_loader = train_data

        if eval_data is not None and isinstance(eval_data, Dataset):
            eval_sampler = DistributedBatchSampler(eval_data,
                                                   batch_size=batch_size)
            eval_loader = DataLoader(eval_data,
                                     batch_sampler=eval_sampler,
                                     places=self._place,
                                     feed_list=feed_list,
                                     num_workers=num_workers,
                                     return_list=True)
        elif eval_data is not None:
            eval_loader = eval_data
        else:
            eval_loader = None

        do_eval = eval_loader is not None
        self._test_dataloader = eval_loader
        metrics_name = self._metrics_name()
        steps = len(train_loader) if hasattr(train_loader, '__len__') else None
        cbks = config_callbacks(
            callbacks,
            model=self,
            epochs=epochs,
            steps=steps,
            log_freq=log_freq,
            save_freq=save_freq,
            save_dir=save_dir,
            verbose=verbose,
            metrics=self._metrics_name(),
        )

        cbks.on_begin('train')
        for epoch in range(epochs):

            # FIXME: adapt to DataLoader
            loader = train_loader
            if not isinstance(train_loader, Iterable):
                loader = train_loader()
            logs = self._run_one_epoch(loader,
                                       cbks,
                                       'train',
                                       metrics_name,
                                       epoch=epoch)

            if do_eval and epoch % eval_freq == 0:
                # FIXME: adapt to DataLoader
                loader = eval_loader
                if not isinstance(eval_loader, Iterable):
                    loader = eval_loader()

                eval_steps = len(loader) if hasattr(loader,
                                                    '__len__') else None
                cbks.on_begin('eval', {
                    'steps': eval_steps,
                    'metrics_name': metrics_name
                })

                logs = self._run_one_epoch(loader, cbks, 'eval', metrics_name)

                cbks.on_end('eval', logs)

        cbks.on_end('train', logs)
        self._test_dataloader = None
Exemple #8
0
    def evaluate(
        self,
        eval_data,
        batch_size=1,
        log_freq=10,
        verbose=2,
        num_workers=0,
        callbacks=None,
    ):
        """
        FIXME: add more comments and usage
        Args:
            eval_data (Dataset|DataLoader): An iterable data loader is used for
                evaluation. An instance of paddle.fluid.io.Dataset or 
                paddle.fluid.io.Dataloader is recomended.
            batch_size (int): Integer number. The batch size of train_data and eval_data. 
                When train_data and eval_data are both the instance of Dataloader, this 
                parameter will be ignored.
            log_freq (int): The frequency, in number of steps, the eval logs
                are printed.
            verbose (int): The verbosity mode, should be 0, 1, or 2.
                0 = silent, 1 = progress bar, 2 = one line per epoch.
            num_workers (int): The number of subprocess to load data, 0 for no subprocess 
                used and loading data in main process. When train_data and eval_data are
                both the instance of Dataloader, this parameter will be ignored.
            callbacks (Callback|None): A list of `Callback` instances to apply
                during training. If None, `ProgBarLogger` and `ModelCheckpoint`
                are automatically inserted.
        """

        if fluid.in_dygraph_mode():
            feed_list = None
        else:
            feed_list = [x.forward() for x in self._inputs + self._labels]

        if eval_data is not None and isinstance(eval_data, Dataset):
            eval_sampler = DistributedBatchSampler(eval_data,
                                                   batch_size=batch_size)
            eval_loader = DataLoader(eval_data,
                                     batch_sampler=eval_sampler,
                                     places=self._place,
                                     feed_list=feed_list,
                                     num_workers=num_workers,
                                     return_list=True)
        else:
            eval_loader = eval_data

        self._test_dataloader = eval_loader
        metrics_name = self._metrics_name()

        cbks = config_callbacks(
            callbacks,
            model=self,
            log_freq=log_freq,
            verbose=verbose,
            metrics=self._metrics_name(),
        )

        loader = eval_loader
        if not isinstance(eval_loader, Iterable):
            loader = eval_loader()

        eval_steps = len(loader) if hasattr(loader, '__len__') else None
        cbks.on_begin('eval', {
            'steps': eval_steps,
            'metrics_name': metrics_name
        })

        logs = self._run_one_epoch(loader, cbks, 'eval', metrics_name)

        cbks.on_end('eval', logs)

        self._test_dataloader = None

        eval_result = {}
        for k in self._metrics_name():
            eval_result[k] = logs[k]

        return eval_result
def main():
    global best_mIoU, start_epoch

    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    if args.dataset == 'LaneDet':
        num_class = 20
        ignore_label = 255
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    # get places
    places = fluid.cuda_places()

    with fluid.dygraph.guard():
        model = models.ERFNet(num_class, [args.img_height, args.img_width])
        input_mean = model.input_mean
        input_std = model.input_std

        # Data loading code
        train_dataset = ds.LaneDataSet(
            dataset_path='datasets/PreliminaryData',
            data_list=args.train_list,
            transform=[
                tf.GroupRandomScale(size=(int(args.img_width), int(args.img_width * 1.2)),
                                    interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)),
                tf.GroupRandomCropRatio(size=(args.img_width, args.img_height)),
                tf.GroupNormalize(mean=(input_mean, (0,)), std=(input_std, (1,))),
            ]
        )

        train_loader = DataLoader(
            train_dataset,
            places=places[0],
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=args.workers,
            drop_last=True
        )

        val_dataset = ds.LaneDataSet(
            dataset_path='datasets/PreliminaryData',
            data_list=args.train_list,
            transform=[
                tf.GroupRandomScale(size=args.img_width, interpolation=(cv2.INTER_LINEAR, cv2.INTER_NEAREST)),
                tf.GroupNormalize(mean=(input_mean, (0,)), std=(input_std, (1,))),
            ],
            is_val=False
        )

        val_loader = DataLoader(
            val_dataset,
            places=places[0],
            batch_size=1,
            shuffle=False,
            num_workers=args.workers,
        )

        # define loss function (criterion) optimizer and evaluator
        weights = [1.0 for _ in range(num_class)]
        weights[0] = 0.25
        weights = fluid.dygraph.to_variable(np.array(weights, dtype=np.float32))
        criterion = fluid.dygraph.NLLLoss(weight=weights, ignore_index=ignore_label)
        evaluator = EvalSegmentation(num_class, ignore_label)

        optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=fluid.dygraph.CosineDecay(
                                                                    args.lr, len(train_loader), args.epochs),
                                                      momentum=args.momentum,
                                                      parameter_list=model.parameters(),
                                                      regularization=fluid.regularizer.L2Decay(
                                                          regularization_coeff=args.weight_decay))

        if args.resume:
            print(("=> loading checkpoint '{}'".format(args.resume)))
            start_epoch = int(''.join([x for x in args.resume.split('/')[-1] if x.isdigit()]))
            checkpoint, optim_checkpoint = fluid.load_dygraph(args.resume)
            model.load_dict(checkpoint)
            optimizer.set_dict(optim_checkpoint)
            print(("=> loaded checkpoint (epoch {})".format(start_epoch)))
        else:
            try:
                checkpoint, _ = fluid.load_dygraph(args.weight)
                model.load_dict(checkpoint)
                print("=> pretrained model loaded successfully")
            except:
                print(("=> no pretrained model found at '{}'".format(args.weight)))

        for epoch in range(start_epoch, args.epochs):
            # train for one epoch
            loss = train(train_loader, model, criterion, optimizer, epoch)

            # writer.add_scalar('lr', optimizer.current_step_lr(), epoch + 1)

            if (epoch + 1) % args.save_freq == 0 or epoch == args.epochs - 1:
                save_checkpoint(model.state_dict(), epoch)
                save_checkpoint(optimizer.state_dict(), epoch)

            # evaluate on validation set
            if (epoch + 1) % args.eval_freq == 0 or epoch == args.epochs - 1:
                mIoU = validate(val_loader, model, evaluator, epoch)

                # remember best mIoU
                is_best = mIoU > best_mIoU
                best_mIoU = max(mIoU, best_mIoU)
                if is_best:
                    tag_best(epoch, best_mIoU)
Exemple #10
0
def train(model,
          train_dataset,
          places=None,
          eval_dataset=None,
          optimizer=None,
          save_dir='output',
          num_epochs=100,
          batch_size=2,
          pretrained_model=None,
          resume_model=None,
          save_interval_epochs=1,
          log_steps=10,
          num_classes=None,
          num_workers=8,
          use_vdl=False):
    ignore_index = model.ignore_index
    nranks = ParallelEnv().nranks

    start_epoch = 0
    if resume_model is not None:
        start_epoch = resume(model, optimizer, resume_model)
    elif pretrained_model is not None:
        load_pretrained_model(model, pretrained_model)

    if not os.path.isdir(save_dir):
        if os.path.exists(save_dir):
            os.remove(save_dir)
        os.makedirs(save_dir)

    if nranks > 1:
        strategy = fluid.dygraph.prepare_context()
        ddp_model = fluid.dygraph.DataParallel(model, strategy)

    batch_sampler = DistributedBatchSampler(train_dataset,
                                            batch_size=batch_size,
                                            shuffle=True,
                                            drop_last=True)
    loader = DataLoader(
        train_dataset,
        batch_sampler=batch_sampler,
        places=places,
        num_workers=num_workers,
        return_list=True,
    )

    if use_vdl:
        from visualdl import LogWriter
        log_writer = LogWriter(save_dir)

    timer = Timer()
    avg_loss = 0.0
    steps_per_epoch = len(batch_sampler)
    total_steps = steps_per_epoch * (num_epochs - start_epoch)
    num_steps = 0
    best_mean_iou = -1.0
    best_model_epoch = -1
    train_reader_cost = 0.0
    train_batch_cost = 0.0
    for epoch in range(start_epoch, num_epochs):
        timer.start()
        for step, data in enumerate(loader):
            train_reader_cost += timer.elapsed_time()
            images = data[0]
            labels = data[1].astype('int64')
            if nranks > 1:
                loss = ddp_model(images, labels)
                # apply_collective_grads sum grads over multiple gpus.
                loss = ddp_model.scale_loss(loss)
                loss.backward()
                ddp_model.apply_collective_grads()
            else:
                loss = model(images, labels)
                loss.backward()
            optimizer.minimize(loss)
            model.clear_gradients()
            avg_loss += loss.numpy()[0]
            lr = optimizer.current_step_lr()
            num_steps += 1
            train_batch_cost += timer.elapsed_time()
            if num_steps % log_steps == 0 and ParallelEnv().local_rank == 0:
                avg_loss /= log_steps
                avg_train_reader_cost = train_reader_cost / log_steps
                avg_train_batch_cost = train_batch_cost / log_steps
                train_reader_cost = 0.0
                train_batch_cost = 0.0
                remain_steps = total_steps - num_steps
                eta = calculate_eta(remain_steps, avg_train_batch_cost)
                logging.info(
                    "[TRAIN] Epoch={}/{}, Step={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.4f} | ETA {}"
                    .format(epoch + 1, num_epochs, step + 1, steps_per_epoch,
                            avg_loss * nranks, lr, avg_train_batch_cost,
                            avg_train_reader_cost, eta))
                if use_vdl:
                    log_writer.add_scalar('Train/loss', avg_loss * nranks,
                                          num_steps)
                    log_writer.add_scalar('Train/lr', lr, num_steps)
                    log_writer.add_scalar('Train/batch_cost',
                                          avg_train_batch_cost, num_steps)
                    log_writer.add_scalar('Train/reader_cost',
                                          avg_train_reader_cost, num_steps)
                avg_loss = 0.0
            timer.restart()

        if ((epoch + 1) % save_interval_epochs == 0
                or epoch + 1 == num_epochs) and ParallelEnv().local_rank == 0:
            current_save_dir = os.path.join(save_dir,
                                            "epoch_{}".format(epoch + 1))
            if not os.path.isdir(current_save_dir):
                os.makedirs(current_save_dir)
            fluid.save_dygraph(model.state_dict(),
                               os.path.join(current_save_dir, 'model'))
            fluid.save_dygraph(optimizer.state_dict(),
                               os.path.join(current_save_dir, 'model'))

            if eval_dataset is not None:
                mean_iou, avg_acc = evaluate(model,
                                             eval_dataset,
                                             model_dir=current_save_dir,
                                             num_classes=num_classes,
                                             ignore_index=ignore_index,
                                             epoch_id=epoch + 1)
                if mean_iou > best_mean_iou:
                    best_mean_iou = mean_iou
                    best_model_epoch = epoch + 1
                    best_model_dir = os.path.join(save_dir, "best_model")
                    fluid.save_dygraph(model.state_dict(),
                                       os.path.join(best_model_dir, 'model'))
                logging.info(
                    'Current evaluated best model in eval_dataset is epoch_{}, miou={:4f}'
                    .format(best_model_epoch, best_mean_iou))

                if use_vdl:
                    log_writer.add_scalar('Evaluate/mIoU', mean_iou, epoch + 1)
                    log_writer.add_scalar('Evaluate/aAcc', avg_acc, epoch + 1)
                model.train()
    if use_vdl:
        log_writer.close()