Example #1
0
def visualize(cfg,
              vis_file_list=None,
              use_gpu=False,
              vis_dir="visual",
              also_save_raw_results=False,
              ckpt_dir=None,
              log_writer=None,
              local_test=False,
              **kwargs):
    if vis_file_list is None:
        vis_file_list = cfg.DATASET.TEST_FILE_LIST
    dataset = SegDataset(file_list=vis_file_list,
                         mode=ModelPhase.VISUAL,
                         data_dir=cfg.DATASET.DATA_DIR)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL)
    # Clone forward graph
    test_prog = test_prog.clone(for_test=True)

    # Generator full colormap for maximum 256 classes
    color_map = get_color_map(256)

    # Get device environment
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    save_dir = os.path.join(vis_dir, 'visual_results')
    makedirs(save_dir)
    if also_save_raw_results:
        raw_save_dir = os.path.join(vis_dir, 'raw_results')
        makedirs(raw_save_dir)

    fetch_list = [pred.name]
    test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True)
    img_cnt = 0
    for imgs, grts, img_names, valid_shapes, org_shapes in test_reader:
        pred_shape = (imgs.shape[2], imgs.shape[3])
        pred, = exe.run(program=test_prog,
                        feed={'image': imgs},
                        fetch_list=fetch_list,
                        return_numpy=True)

        num_imgs = pred.shape[0]
        # TODO: use multi-thread to write images
        for i in range(num_imgs):
            # Add more comments
            res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8)
            img_name = img_names[i]
            grt = grts[i]
            res_shape = (res_map.shape[0], res_map.shape[1])
            if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]:
                res_map = cv2.resize(res_map,
                                     pred_shape,
                                     interpolation=cv2.INTER_NEAREST)
            valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1])
            res_map = res_map[0:valid_shape[0], 0:valid_shape[1]]
            org_shape = (org_shapes[i, 0], org_shapes[i, 1])
            res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]),
                                 interpolation=cv2.INTER_NEAREST)

            if grt is not None:
                grt = grt[0:valid_shape[0], 0:valid_shape[1]]
                grt = cv2.resize(grt, (org_shape[1], org_shape[0]),
                                 interpolation=cv2.INTER_NEAREST)

            png_fn = to_png_fn(img_names[i])
            if also_save_raw_results:
                raw_fn = os.path.join(raw_save_dir, png_fn)
                dirname = os.path.dirname(raw_save_dir)
                makedirs(dirname)
                cv2.imwrite(raw_fn, res_map)

            # colorful segment result visualization
            vis_fn = os.path.join(save_dir, png_fn)
            dirname = os.path.dirname(vis_fn)
            makedirs(dirname)

            pred_mask = colorize(res_map, org_shapes[i], color_map)
            if grt is not None:
                grt = colorize(grt, org_shapes[i], color_map)
            cv2.imwrite(vis_fn, pred_mask)

            img_cnt += 1
            print("#{} visualize image path: {}".format(img_cnt, vis_fn))

            # Use Tensorboard to visualize image
            if log_writer is not None:
                # Calulate epoch from ckpt_dir folder name
                epoch = int(os.path.split(ckpt_dir)[-1])
                print("Tensorboard visualization epoch", epoch)
                log_writer.add_image("Predict/{}".format(img_names[i]),
                                     pred_mask[..., ::-1],
                                     epoch,
                                     dataformats='HWC')
                # Original image
                # BGR->RGB
                img = cv2.imread(
                    os.path.join(cfg.DATASET.DATA_DIR,
                                 img_names[i]))[..., ::-1]
                log_writer.add_image("Images/{}".format(img_names[i]),
                                     img,
                                     epoch,
                                     dataformats='HWC')
                #add ground truth (label) images
                if grt is not None:
                    log_writer.add_image("Label/{}".format(img_names[i]),
                                         grt[..., ::-1],
                                         epoch,
                                         dataformats='HWC')

        # If in local_test mode, only visualize 5 images just for testing
        # procedure
        if local_test and img_cnt >= 5:
            break
Example #2
0
def train(cfg):
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    drop_last = True

    dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST,
                         mode=ModelPhase.TRAIN,
                         shuffle=True,
                         data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item[0], item[1], item[2]
                batch_data = []
        # If use sync batch norm strategy, drop last batch if number of samples
        # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues
        if not cfg.TRAIN.SYNC_BATCH_NORM:
            for item in batch_data:
                yield item[0], item[1], item[2]

    # Get device environment
    # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
    # place = places[0]
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#Device count: {}".format(dev_count))

    # Make sure BATCH_SIZE can divided by GPU cards
    assert cfg.BATCH_SIZE % dev_count == 0, (
        'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format(
            cfg.BATCH_SIZE, dev_count))
    # If use multi-gpu training mode, batch data will allocated to each GPU evenly
    batch_size_per_dev = cfg.BATCH_SIZE // dev_count
    print_info("batch_size_per_dev: {}".format(batch_size_per_dev))

    py_reader, avg_loss, lr, pred, grts, masks = build_model(
        train_prog, startup_prog, phase=ModelPhase.TRAIN)
    py_reader.decorate_sample_generator(data_generator,
                                        batch_size=batch_size_per_dev,
                                        drop_last=drop_last)

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    exec_strategy = fluid.ExecutionStrategy()
    # Clear temporary variables every 100 iteration
    if args.use_gpu:
        exec_strategy.num_threads = fluid.core.get_cuda_device_count()
    exec_strategy.num_iteration_per_drop_scope = 100
    build_strategy = fluid.BuildStrategy()

    if cfg.NUM_TRAINERS > 1 and args.use_gpu:
        dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog)
        exec_strategy.num_threads = 1

    if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
        if dev_count > 1:
            # Apply sync batch norm strategy
            print_info("Sync BatchNorm strategy is effective.")
            build_strategy.sync_batch_norm = True
        else:
            print_info(
                "Sync BatchNorm strategy will not be effective if GPU device"
                " count <= 1")
    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=avg_loss.name,
        exec_strategy=exec_strategy,
        build_strategy=build_strategy)

    # Resume training
    begin_epoch = cfg.SOLVER.BEGIN_EPOCH
    if cfg.TRAIN.RESUME_MODEL_DIR:
        begin_epoch = load_checkpoint(exe, train_prog)
    # Load pretrained model
    elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
        print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR)
        load_vars = []
        load_fail_vars = []

        def var_shape_matched(var, shape):
            """
            Check whehter persitable variable shape is match with current network
            """
            var_exist = os.path.exists(
                os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
            if var_exist:
                var_shape = parse_shape_from_file(
                    os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
                return var_shape == shape
            return False

        for x in train_prog.list_vars():
            if isinstance(x, fluid.framework.Parameter):
                shape = tuple(fluid.global_scope().find_var(
                    x.name).get_tensor().shape())
                if var_shape_matched(x, shape):
                    load_vars.append(x)
                else:
                    load_fail_vars.append(x)

        fluid.io.load_vars(exe,
                           dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR,
                           vars=load_vars)
        for var in load_vars:
            print_info("Parameter[{}] loaded sucessfully!".format(var.name))
        for var in load_fail_vars:
            print_info(
                "Parameter[{}] don't exist or shape does not match current network, skip"
                " to load it.".format(var.name))
        print_info("{}/{} pretrained parameters loaded successfully!".format(
            len(load_vars),
            len(load_vars) + len(load_fail_vars)))
    else:
        print_info(
            'Pretrained model dir {} not exists, training from scratch...'.
            format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

    fetch_list = [avg_loss.name, lr.name]
    if args.debug:
        # Fetch more variable info and use streaming confusion matrix to
        # calculate IoU results if in debug mode
        np.set_printoptions(precision=4,
                            suppress=True,
                            linewidth=160,
                            floatmode="fixed")
        fetch_list.extend([pred.name, grts.name, masks.name])
        cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)

    if args.use_tb:
        if not args.tb_log_dir:
            print_info("Please specify the log directory by --tb_log_dir.")
            exit(1)

        from tb_paddle import SummaryWriter
        log_writer = SummaryWriter(args.tb_log_dir)

    # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
    # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    global_step = 0
    all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE
    if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True:
        all_step += 1
    all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

    avg_loss = 0.0
    timer = Timer()
    timer.start()
    if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
        raise ValueError((
            "begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
                begin_epoch, cfg.SOLVER.NUM_EPOCHS))

    if args.use_mpio:
        print_info("Use multiprocess reader")
    else:
        print_info("Use multi-thread reader")

    for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
        py_reader.start()
        while True:
            try:
                if args.debug:
                    # Print category IoU and accuracy to check whether the
                    # traning process is corresponed to expectation
                    loss, lr, pred, grts, masks = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    cm.calculate(pred, grts, masks)
                    avg_loss += np.mean(np.array(loss))
                    global_step += 1

                    if global_step % args.log_steps == 0:
                        speed = args.log_steps / timer.elapsed_time()
                        avg_loss /= args.log_steps
                        category_acc, mean_acc = cm.accuracy()
                        category_iou, mean_iou = cm.mean_iou()

                        print_info((
                            "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, global_step, lr[0], avg_loss, mean_acc,
                                 mean_iou, speed,
                                 calculate_eta(all_step - global_step, speed)))
                        print_info("Category IoU: ", category_iou)
                        print_info("Category Acc: ", category_acc)
                        if args.use_tb:
                            log_writer.add_scalar('Train/mean_iou', mean_iou,
                                                  global_step)
                            log_writer.add_scalar('Train/mean_acc', mean_acc,
                                                  global_step)
                            log_writer.add_scalar('Train/loss', avg_loss,
                                                  global_step)
                            log_writer.add_scalar('Train/lr', lr[0],
                                                  global_step)
                            log_writer.add_scalar('Train/step/sec', speed,
                                                  global_step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        cm.zero_matrix()
                        timer.restart()
                else:
                    # If not in debug mode, avoid unnessary log and calculate
                    loss, lr = exe.run(program=compiled_train_prog,
                                       fetch_list=fetch_list,
                                       return_numpy=True)
                    avg_loss += np.mean(np.array(loss))
                    global_step += 1

                    if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                        avg_loss /= args.log_steps
                        speed = args.log_steps / timer.elapsed_time()
                        print((
                            "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, global_step, lr[0], avg_loss, speed,
                                 calculate_eta(all_step - global_step, speed)))
                        if args.use_tb:
                            log_writer.add_scalar('Train/loss', avg_loss,
                                                  global_step)
                            log_writer.add_scalar('Train/lr', lr[0],
                                                  global_step)
                            log_writer.add_scalar('Train/speed', speed,
                                                  global_step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        timer.restart()

            except fluid.core.EOFException:
                py_reader.reset()
                break
            except Exception as e:
                print(e)

        if epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 and cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(exe, train_prog, epoch)

            if args.do_eval:
                print("Evaluation start")
                _, mean_iou, _, mean_acc = evaluate(cfg=cfg,
                                                    ckpt_dir=ckpt_dir,
                                                    use_gpu=args.use_gpu,
                                                    use_mpio=args.use_mpio)
                if args.use_tb:
                    log_writer.add_scalar('Evaluate/mean_iou', mean_iou,
                                          global_step)
                    log_writer.add_scalar('Evaluate/mean_acc', mean_acc,
                                          global_step)

            # Use Tensorboard to visualize results
            if args.use_tb and cfg.DATASET.VIS_FILE_LIST is not None:
                visualize(cfg=cfg,
                          use_gpu=args.use_gpu,
                          vis_file_list=cfg.DATASET.VIS_FILE_LIST,
                          vis_dir="visual",
                          ckpt_dir=ckpt_dir,
                          log_writer=log_writer)

    # save final model
    if cfg.TRAINER_ID == 0:
        save_checkpoint(exe, train_prog, 'final')
Example #3
0
def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
    np.set_printoptions(precision=5, suppress=True)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    dataset = SegDataset(
        file_list=cfg.DATASET.VAL_FILE_LIST,
        mode=ModelPhase.EVAL,
        data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        #TODO: check is batch reader compatitable with Windows
        if use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        for b in data_gen:
            yield b[0], b[1], b[2]

    data_loader, avg_loss, pred, grts, masks = build_model(
        test_prog, startup_prog, phase=ModelPhase.EVAL, arch=kwargs['arch'])

    data_loader.set_sample_generator(
        data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE)

    # Get device environment
    places = fluid.cuda_places() if use_gpu else fluid.cpu_places()
    place = places[0]
    dev_count = len(places)
    print("#Device count: {}".format(dev_count))

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    test_prog = test_prog.clone(for_test=True)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if not os.path.exists(ckpt_dir):
        raise ValueError('The TEST.TEST_MODEL {} is not found'.format(ckpt_dir))

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        try:
            fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
        except:
            fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    # Use streaming confusion matrix to calculate mean_iou
    np.set_printoptions(
        precision=4, suppress=True, linewidth=160, floatmode="fixed")
    conf_mat = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)
    fetch_list = [avg_loss.name, pred.name, grts.name, masks.name]
    num_images = 0
    step = 0
    all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
    timer = Timer()
    timer.start()
    data_loader.start()
    while True:
        try:
            step += 1
            loss, pred, grts, masks = exe.run(
                test_prog, fetch_list=fetch_list, return_numpy=True)

            loss = np.mean(np.array(loss))

            num_images += pred.shape[0]
            conf_mat.calculate(pred, grts, masks)
            _, iou = conf_mat.mean_iou()
            _, acc = conf_mat.accuracy()

            speed = 1.0 / timer.elapsed_time()

            print(
                "[EVAL]step={} loss={:.5f} acc={:.4f} IoU={:.4f} step/sec={:.2f} | ETA {}"
                .format(step, loss, acc, iou, speed,
                        calculate_eta(all_step - step, speed)))
            timer.restart()
            sys.stdout.flush()
        except fluid.core.EOFException:
            break

    category_iou, avg_iou = conf_mat.mean_iou()
    category_acc, avg_acc = conf_mat.accuracy()
    print("[EVAL]#image={} acc={:.4f} IoU={:.4f}".format(
        num_images, avg_acc, avg_iou))
    print("[EVAL]Category IoU:", category_iou)
    print("[EVAL]Category Acc:", category_acc)
    print("[EVAL]Kappa:{:.4f}".format(conf_mat.kappa()))

    return category_iou, avg_iou, category_acc, avg_acc
Example #4
0
def train(cfg):
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    test_prog = fluid.Program()
    if args.enable_ce:
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000
    drop_last = True

    dataset = SegDataset(
        file_list=cfg.DATASET.TRAIN_FILE_LIST,
        mode=ModelPhase.TRAIN,
        shuffle=True,
        data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item[0], item[1], item[2]
                batch_data = []
        # If use sync batch norm strategy, drop last batch if number of samples
        # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues
        if not cfg.TRAIN.SYNC_BATCH_NORM:
            for item in batch_data:
                yield item[0], item[1], item[2]

    # Get device environment
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#Device count: {}".format(dev_count))

    # Make sure BATCH_SIZE can divided by GPU cards
    assert cfg.BATCH_SIZE % dev_count == 0, (
        'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format(
            cfg.BATCH_SIZE, dev_count))
    # If use multi-gpu training mode, batch data will allocated to each GPU evenly
    batch_size_per_dev = cfg.BATCH_SIZE // dev_count
    print_info("batch_size_per_dev: {}".format(batch_size_per_dev))

    data_loader, avg_loss, lr, pred, grts, masks = build_model(
        train_prog, startup_prog, phase=ModelPhase.TRAIN)
    build_model(test_prog, fluid.Program(), phase=ModelPhase.EVAL)
    data_loader.set_sample_generator(
        data_generator, batch_size=batch_size_per_dev, drop_last=drop_last)

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    exec_strategy = fluid.ExecutionStrategy()
    # Clear temporary variables every 100 iteration
    if args.use_gpu:
        exec_strategy.num_threads = fluid.core.get_cuda_device_count()
    exec_strategy.num_iteration_per_drop_scope = 100
    build_strategy = fluid.BuildStrategy()

    if cfg.NUM_TRAINERS > 1 and args.use_gpu:
        dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog)
        exec_strategy.num_threads = 1

    if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
        if dev_count > 1:
            # Apply sync batch norm strategy
            print_info("Sync BatchNorm strategy is effective.")
            build_strategy.sync_batch_norm = True
        else:
            print_info(
                "Sync BatchNorm strategy will not be effective if GPU device"
                " count <= 1")
    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=avg_loss.name,
        exec_strategy=exec_strategy,
        build_strategy=build_strategy)

    # Resume training
    begin_epoch = cfg.SOLVER.BEGIN_EPOCH
    if cfg.TRAIN.RESUME_MODEL_DIR:
        begin_epoch = load_checkpoint(exe, train_prog)
    # Load pretrained model
    elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
        load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR)
    else:
        print_info(
            'Pretrained model dir {} not exists, training from scratch...'.
            format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

    fetch_list = [avg_loss.name, lr.name]
    if args.debug:
        # Fetch more variable info and use streaming confusion matrix to
        # calculate IoU results if in debug mode
        np.set_printoptions(
            precision=4, suppress=True, linewidth=160, floatmode="fixed")
        fetch_list.extend([pred.name, grts.name, masks.name])
        cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)

    if args.use_vdl:
        if not args.vdl_log_dir:
            print_info("Please specify the log directory by --vdl_log_dir.")
            exit(1)

        from visualdl import LogWriter
        log_writer = LogWriter(args.vdl_log_dir)

    # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
    # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    step = 0
    all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE
    if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True:
        all_step += 1
    all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

    avg_loss = 0.0
    best_mIoU = 0.0

    timer = Timer()
    timer.start()
    if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
        raise ValueError(
            ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
                begin_epoch, cfg.SOLVER.NUM_EPOCHS))

    if args.use_mpio:
        print_info("Use multiprocess reader")
    else:
        print_info("Use multi-thread reader")

    for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
        data_loader.start()
        while True:
            try:
                if args.debug:
                    # Print category IoU and accuracy to check whether the
                    # traning process is corresponed to expectation
                    loss, lr, pred, grts, masks = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    cm.calculate(pred, grts, masks)
                    avg_loss += np.mean(np.array(loss))
                    step += 1

                    if step % args.log_steps == 0:
                        speed = args.log_steps / timer.elapsed_time()
                        avg_loss /= args.log_steps
                        category_acc, mean_acc = cm.accuracy()
                        category_iou, mean_iou = cm.mean_iou()

                        print_info((
                            "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, step, lr[0], avg_loss, mean_acc,
                                 mean_iou, speed,
                                 calculate_eta(all_step - step, speed)))
                        print_info("Category IoU: ", category_iou)
                        print_info("Category Acc: ", category_acc)
                        if args.use_vdl:
                            log_writer.add_scalar('Train/mean_iou', mean_iou,
                                                  step)
                            log_writer.add_scalar('Train/mean_acc', mean_acc,
                                                  step)
                            log_writer.add_scalar('Train/loss', avg_loss, step)
                            log_writer.add_scalar('Train/lr', lr[0], step)
                            log_writer.add_scalar('Train/step/sec', speed, step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        cm.zero_matrix()
                        timer.restart()
                else:
                    # If not in debug mode, avoid unnessary log and calculate
                    loss, lr = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    avg_loss += np.mean(np.array(loss))
                    step += 1

                    if step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                        avg_loss /= args.log_steps
                        speed = args.log_steps / timer.elapsed_time()
                        print((
                            "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, step, lr[0], avg_loss, speed,
                                 calculate_eta(all_step - step, speed)))
                        if args.use_vdl:
                            log_writer.add_scalar('Train/loss', avg_loss, step)
                            log_writer.add_scalar('Train/lr', lr[0], step)
                            log_writer.add_scalar('Train/speed', speed, step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        timer.restart()

                    # NOTE : used for benchmark, profiler tools
                    if args.is_profiler and epoch == 1 and step == args.log_steps:
                        profiler.start_profiler("All")
                    elif args.is_profiler and epoch == 1 and step == args.log_steps + 5:
                        profiler.stop_profiler("total", args.profiler_path)
                        return

            except fluid.core.EOFException:
                data_loader.reset()
                break
            except Exception as e:
                print(e)

        if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0
                or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(train_prog, epoch)
            save_infer_program(test_prog, ckpt_dir)

            if args.do_eval:
                print("Evaluation start")
                _, mean_iou, _, mean_acc = evaluate(
                    cfg=cfg,
                    ckpt_dir=ckpt_dir,
                    use_gpu=args.use_gpu,
                    use_mpio=args.use_mpio)
                if args.use_vdl:
                    log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step)
                    log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step)

                if mean_iou > best_mIoU:
                    best_mIoU = mean_iou
                    update_best_model(ckpt_dir)
                    print_info("Save best model {} to {}, mIoU = {:.4f}".format(
                        ckpt_dir,
                        os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model'),
                        mean_iou))

            # Use VisualDL to visualize results
            if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None:
                visualize(
                    cfg=cfg,
                    use_gpu=args.use_gpu,
                    vis_file_list=cfg.DATASET.VIS_FILE_LIST,
                    vis_dir="visual",
                    ckpt_dir=ckpt_dir,
                    log_writer=log_writer)

    # save final model
    if cfg.TRAINER_ID == 0:
        ckpt_dir = save_checkpoint(train_prog, 'final')
        save_infer_program(test_prog, ckpt_dir)
Example #5
0
def visualize(cfg,
              vis_file_list=None,
              use_gpu=False,
              vis_dir="visual",
              ckpt_dir=None,
              log_writer=None,
              local_test=False,
              **kwargs):
    if vis_file_list is None:
        vis_file_list = cfg.DATASET.VIS_FILE_LIST
    dataset = SegDataset(file_list=vis_file_list,
                         mode=ModelPhase.VISUAL,
                         data_dir=cfg.DATASET.DATA_DIR)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    pred, out = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL)
    # Clone forward graph
    test_prog = test_prog.clone(for_test=True)

    # Generator full colormap for maximum 256 classes
    color_map = get_color_map_list(256)

    # Get device environment
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        try:
            fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
        except:
            fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    save_dir = vis_dir
    makedirs(save_dir)

    fetch_list = [pred.name, out.name]
    test_reader = dataset.batch(dataset.generator,
                                batch_size=cfg.TEST.BATCH_SIZE,
                                is_test=True)
    img_cnt = 0
    total_logit = None
    test_reader = tqdm(test_reader,
                       total=math.ceil(cfg.DATASET.VIS_TOTAL_IMAGES //
                                       cfg.TEST.BATCH_SIZE),
                       ncols=120)
    for imgs, grts, img_names, valid_shapes, org_shapes in test_reader:
        pred_shape = (imgs.shape[2], imgs.shape[3])
        pred, out = exe.run(program=test_prog,
                            feed={'image': imgs},
                            fetch_list=fetch_list,
                            return_numpy=True)
        # if total_logit is None:
        #     total_logit = out
        # else:
        #     total_logit = np.concatenate([total_logit, out])

        num_imgs = pred.shape[0]
        # TODO: use multi-thread to write images
        for i in range(num_imgs):
            # Add more comments
            res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8)
            img_name = img_names[i]
            res_shape = (res_map.shape[0], res_map.shape[1])
            if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]:
                res_map = cv2.resize(res_map,
                                     pred_shape,
                                     interpolation=cv2.INTER_NEAREST)
            valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1])
            res_map = res_map[0:valid_shape[0], 0:valid_shape[1]]
            org_shape = (org_shapes[i, 0], org_shapes[i, 1])
            res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]),
                                 interpolation=cv2.INTER_NEAREST)

            png_fn = to_png_fn(img_name)

            # colorful segment result visualization
            vis_fn = os.path.join(save_dir, png_fn)
            dirname = os.path.dirname(vis_fn)
            makedirs(dirname)

            pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L')
            #pred_mask.putpalette(color_map)
            pred_mask.save(vis_fn)

            img_cnt += 1
            # print("#{} visualize image path: {}".format(img_cnt, vis_fn))

            # Use VisualDL to visualize image
            if log_writer is not None:
                # Calulate epoch from ckpt_dir folder name
                epoch = int(os.path.split(ckpt_dir)[-1])
                print("VisualDL visualization epoch", epoch)

                pred_mask_np = np.array(pred_mask.convert("RGB"))
                log_writer.add_image("Predict/{}".format(img_name),
                                     pred_mask_np, epoch)
                # Original image
                # BGR->RGB
                img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR,
                                              img_name))[..., ::-1]
                log_writer.add_image("Images/{}".format(img_name), img, epoch)
                # add ground truth (label) images
                grt = grts[i]
                if grt is not None:
                    grt = grt[0:valid_shape[0], 0:valid_shape[1]]
                    grt_pil = PILImage.fromarray(grt.astype(np.uint8),
                                                 mode='P')
                    grt_pil.putpalette(color_map)
                    grt_pil = grt_pil.resize((org_shape[1], org_shape[0]))
                    grt = np.array(grt_pil.convert("RGB"))
                    log_writer.add_image("Label/{}".format(img_name), grt,
                                         epoch)

        # If in local_test mode, only visualize 5 images just for testing
        # procedure
        if local_test and img_cnt >= 5:
            break
Example #6
0
def train(cfg):
    # startup_prog = fluid.Program()
    # train_prog = fluid.Program()

    drop_last = True

    dataset = SegDataset(
        file_list=cfg.DATASET.TRAIN_FILE_LIST,
        mode=ModelPhase.TRAIN,
        shuffle=True,
        data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item[0], item[1], item[2]
                batch_data = []
        # If use sync batch norm strategy, drop last batch if number of samples
        # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues
        if not cfg.TRAIN.SYNC_BATCH_NORM:
            for item in batch_data:
                yield item[0], item[1], item[2]

    # Get device environment
    # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
    # place = places[0]
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#Device count: {}".format(dev_count))

    # Make sure BATCH_SIZE can divided by GPU cards
    assert cfg.BATCH_SIZE % dev_count == 0, (
        'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format(
            cfg.BATCH_SIZE, dev_count))
    # If use multi-gpu training mode, batch data will allocated to each GPU evenly
    batch_size_per_dev = cfg.BATCH_SIZE // dev_count
    print_info("batch_size_per_dev: {}".format(batch_size_per_dev))

    data_loader, loss, lr, pred, grts, masks, image = build_model(
        phase=ModelPhase.TRAIN)
    data_loader.set_sample_generator(
        data_generator, batch_size=batch_size_per_dev, drop_last=drop_last)

    exe = fluid.Executor(place)

    cfg.update_from_file(args.teacher_cfg_file)
    # teacher_arch = teacher_cfg.architecture
    teacher_program = fluid.Program()
    teacher_startup_program = fluid.Program()

    with fluid.program_guard(teacher_program, teacher_startup_program):
        with fluid.unique_name.guard():
            _, teacher_loss, _, _, _, _, _ = build_model(
                teacher_program,
                teacher_startup_program,
                phase=ModelPhase.TRAIN,
                image=image,
                label=grts,
                mask=masks)

    exe.run(teacher_startup_program)

    teacher_program = teacher_program.clone(for_test=True)
    ckpt_dir = cfg.SLIM.KNOWLEDGE_DISTILL_TEACHER_MODEL_DIR
    assert ckpt_dir is not None
    print('load teacher model:', ckpt_dir)
    if os.path.exists(ckpt_dir):
        try:
            fluid.load(teacher_program, os.path.join(ckpt_dir, 'model'), exe)
        except:
            fluid.io.load_params(exe, ckpt_dir, main_program=teacher_program)

    # cfg = load_config(FLAGS.config)
    cfg.update_from_file(args.cfg_file)
    data_name_map = {
        'image': 'image',
        'label': 'label',
        'mask': 'mask',
    }
    merge(teacher_program, fluid.default_main_program(), data_name_map, place)
    distill_pairs = [[
        'teacher_bilinear_interp_2.tmp_0', 'bilinear_interp_0.tmp_0'
    ]]

    def distill(pairs, weight):
        """
        Add 3 pairs of distillation losses, each pair of feature maps is the
        input of teacher and student's yolov3_loss respectively
        """
        loss = l2_loss(pairs[0][0], pairs[0][1])
        weighted_loss = loss * weight
        return weighted_loss

    distill_loss = distill(distill_pairs, 0.1)
    cfg.update_from_file(args.cfg_file)
    optimizer = solver.Solver(None, None)
    all_loss = loss + distill_loss
    lr = optimizer.optimise(all_loss)

    exe.run(fluid.default_startup_program())

    exec_strategy = fluid.ExecutionStrategy()
    # Clear temporary variables every 100 iteration
    if args.use_gpu:
        exec_strategy.num_threads = fluid.core.get_cuda_device_count()
    exec_strategy.num_iteration_per_drop_scope = 100
    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_reduce_ops = False
    build_strategy.fuse_all_optimizer_ops = False
    build_strategy.fuse_elewise_add_act_ops = True
    if cfg.NUM_TRAINERS > 1 and args.use_gpu:
        dist_utils.prepare_for_multi_process(exe, build_strategy,
                                             fluid.default_main_program())
        exec_strategy.num_threads = 1

    if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
        if dev_count > 1:
            # Apply sync batch norm strategy
            print_info("Sync BatchNorm strategy is effective.")
            build_strategy.sync_batch_norm = True
        else:
            print_info(
                "Sync BatchNorm strategy will not be effective if GPU device"
                " count <= 1")
    compiled_train_prog = fluid.CompiledProgram(
        fluid.default_main_program()).with_data_parallel(
            loss_name=all_loss.name,
            exec_strategy=exec_strategy,
            build_strategy=build_strategy)

    # Resume training
    begin_epoch = cfg.SOLVER.BEGIN_EPOCH
    if cfg.TRAIN.RESUME_MODEL_DIR:
        begin_epoch = load_checkpoint(exe, fluid.default_main_program())
    # Load pretrained model
    elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
        load_pretrained_weights(exe, fluid.default_main_program(),
                                cfg.TRAIN.PRETRAINED_MODEL_DIR)
    else:
        print_info(
            'Pretrained model dir {} not exists, training from scratch...'.
            format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

    #fetch_list = [avg_loss.name, lr.name]
    fetch_list = [
        loss.name, 'teacher_' + teacher_loss.name, distill_loss.name, lr.name
    ]

    if args.debug:
        # Fetch more variable info and use streaming confusion matrix to
        # calculate IoU results if in debug mode
        np.set_printoptions(
            precision=4, suppress=True, linewidth=160, floatmode="fixed")
        fetch_list.extend([pred.name, grts.name, masks.name])
        cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)

    if args.use_vdl:
        if not args.vdl_log_dir:
            print_info("Please specify the log directory by --vdl_log_dir.")
            exit(1)

        from visualdl import LogWriter
        log_writer = LogWriter(args.vdl_log_dir)

    # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
    # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    step = 0
    all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE
    if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True:
        all_step += 1
    all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

    avg_loss = 0.0
    avg_t_loss = 0.0
    avg_d_loss = 0.0
    best_mIoU = 0.0

    timer = Timer()
    timer.start()
    if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
        raise ValueError(
            ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
                begin_epoch, cfg.SOLVER.NUM_EPOCHS))

    if args.use_mpio:
        print_info("Use multiprocess reader")
    else:
        print_info("Use multi-thread reader")

    for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
        data_loader.start()
        while True:
            try:
                if args.debug:
                    # Print category IoU and accuracy to check whether the
                    # traning process is corresponed to expectation
                    loss, lr, pred, grts, masks = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    cm.calculate(pred, grts, masks)
                    avg_loss += np.mean(np.array(loss))
                    step += 1

                    if step % args.log_steps == 0:
                        speed = args.log_steps / timer.elapsed_time()
                        avg_loss /= args.log_steps
                        category_acc, mean_acc = cm.accuracy()
                        category_iou, mean_iou = cm.mean_iou()

                        print_info((
                            "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, step, lr[0], avg_loss, mean_acc,
                                 mean_iou, speed,
                                 calculate_eta(all_step - step, speed)))
                        print_info("Category IoU: ", category_iou)
                        print_info("Category Acc: ", category_acc)
                        if args.use_vdl:
                            log_writer.add_scalar('Train/mean_iou', mean_iou,
                                                  step)
                            log_writer.add_scalar('Train/mean_acc', mean_acc,
                                                  step)
                            log_writer.add_scalar('Train/loss', avg_loss, step)
                            log_writer.add_scalar('Train/lr', lr[0], step)
                            log_writer.add_scalar('Train/step/sec', speed, step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        cm.zero_matrix()
                        timer.restart()
                else:
                    # If not in debug mode, avoid unnessary log and calculate
                    loss, t_loss, d_loss, lr = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    avg_loss += np.mean(np.array(loss))
                    avg_t_loss += np.mean(np.array(t_loss))
                    avg_d_loss += np.mean(np.array(d_loss))
                    step += 1

                    if step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                        avg_loss /= args.log_steps
                        avg_t_loss /= args.log_steps
                        avg_d_loss /= args.log_steps
                        speed = args.log_steps / timer.elapsed_time()
                        print((
                            "epoch={} step={} lr={:.5f} loss={:.4f} teacher loss={:.4f} distill loss={:.4f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, step, lr[0], avg_loss, avg_t_loss,
                                 avg_d_loss, speed,
                                 calculate_eta(all_step - step, speed)))
                        if args.use_vdl:
                            log_writer.add_scalar('Train/loss', avg_loss, step)
                            log_writer.add_scalar('Train/lr', lr[0], step)
                            log_writer.add_scalar('Train/speed', speed, step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        avg_t_loss = 0.0
                        avg_d_loss = 0.0
                        timer.restart()

            except fluid.core.EOFException:
                data_loader.reset()
                break
            except Exception as e:
                print(e)

        if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0
                or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(fluid.default_main_program(), epoch)

            if args.do_eval:
                print("Evaluation start")
                _, mean_iou, _, mean_acc = evaluate(
                    cfg=cfg,
                    ckpt_dir=ckpt_dir,
                    use_gpu=args.use_gpu,
                    use_mpio=args.use_mpio)
                if args.use_vdl:
                    log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step)
                    log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step)

                if mean_iou > best_mIoU:
                    best_mIoU = mean_iou
                    update_best_model(ckpt_dir)
                    print_info("Save best model {} to {}, mIoU = {:.4f}".format(
                        ckpt_dir,
                        os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model'),
                        mean_iou))

            # Use VisualDL to visualize results
            if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None:
                visualize(
                    cfg=cfg,
                    use_gpu=args.use_gpu,
                    vis_file_list=cfg.DATASET.VIS_FILE_LIST,
                    vis_dir="visual",
                    ckpt_dir=ckpt_dir,
                    log_writer=log_writer)
        if cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(fluid.default_main_program(), epoch)

    # save final model
    if cfg.TRAINER_ID == 0:
        save_checkpoint(fluid.default_main_program(), 'final')
Example #7
0
def train(cfg):
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    if args.enable_ce:
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000
    drop_last = True

    dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST,
                         mode=ModelPhase.TRAIN,
                         shuffle=True,
                         data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item[0], item[1], item[2]
                batch_data = []
        # If use sync batch norm strategy, drop last batch if number of samples
        # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues
        if not cfg.TRAIN.SYNC_BATCH_NORM:
            for item in batch_data:
                yield item[0], item[1], item[2]

    # Get device environment
    # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
    # place = places[0]
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#Device count: {}".format(dev_count))

    # Make sure BATCH_SIZE can divided by GPU cards
    assert cfg.BATCH_SIZE % dev_count == 0, (
        'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format(
            cfg.BATCH_SIZE, dev_count))
    # If use multi-gpu training mode, batch data will allocated to each GPU evenly
    batch_size_per_dev = cfg.BATCH_SIZE // dev_count
    print_info("batch_size_per_dev: {}".format(batch_size_per_dev))

    config_info = {'input_size': 769, 'output_size': 1, 'block_num': 7}
    config = ([(cfg.SLIM.NAS_SPACE_NAME, config_info)])
    factory = SearchSpaceFactory()
    space = factory.get_search_space(config)

    port = cfg.SLIM.NAS_PORT
    server_address = (cfg.SLIM.NAS_ADDRESS, port)
    sa_nas = SANAS(config,
                   server_addr=server_address,
                   search_steps=cfg.SLIM.NAS_SEARCH_STEPS,
                   is_server=cfg.SLIM.NAS_IS_SERVER)
    for step in range(cfg.SLIM.NAS_SEARCH_STEPS):
        arch = sa_nas.next_archs()[0]

        start_prog = fluid.Program()
        train_prog = fluid.Program()

        data_loader, avg_loss, lr, pred, grts, masks = build_model(
            train_prog, start_prog, arch=arch, phase=ModelPhase.TRAIN)

        cur_flops = flops(train_prog)
        print('current step:', step, 'flops:', cur_flops)

        data_loader.set_sample_generator(data_generator,
                                         batch_size=batch_size_per_dev,
                                         drop_last=drop_last)

        exe = fluid.Executor(place)
        exe.run(start_prog)

        exec_strategy = fluid.ExecutionStrategy()
        # Clear temporary variables every 100 iteration
        if args.use_gpu:
            exec_strategy.num_threads = fluid.core.get_cuda_device_count()
        exec_strategy.num_iteration_per_drop_scope = 100
        build_strategy = fluid.BuildStrategy()

        if cfg.NUM_TRAINERS > 1 and args.use_gpu:
            dist_utils.prepare_for_multi_process(exe, build_strategy,
                                                 train_prog)
            exec_strategy.num_threads = 1

        if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
            if dev_count > 1:
                # Apply sync batch norm strategy
                print_info("Sync BatchNorm strategy is effective.")
                build_strategy.sync_batch_norm = True
            else:
                print_info(
                    "Sync BatchNorm strategy will not be effective if GPU device"
                    " count <= 1")
        compiled_train_prog = fluid.CompiledProgram(
            train_prog).with_data_parallel(loss_name=avg_loss.name,
                                           exec_strategy=exec_strategy,
                                           build_strategy=build_strategy)

        # Resume training
        begin_epoch = cfg.SOLVER.BEGIN_EPOCH
        if cfg.TRAIN.RESUME_MODEL_DIR:
            begin_epoch = load_checkpoint(exe, train_prog)
        # Load pretrained model
        elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
            print_info('Pretrained model dir: ',
                       cfg.TRAIN.PRETRAINED_MODEL_DIR)
            load_vars = []
            load_fail_vars = []

            def var_shape_matched(var, shape):
                """
                Check whehter persitable variable shape is match with current network
                """
                var_exist = os.path.exists(
                    os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
                if var_exist:
                    var_shape = parse_shape_from_file(
                        os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
                    return var_shape == shape
                return False

            for x in train_prog.list_vars():
                if isinstance(x, fluid.framework.Parameter):
                    shape = tuple(fluid.global_scope().find_var(
                        x.name).get_tensor().shape())
                    if var_shape_matched(x, shape):
                        load_vars.append(x)
                    else:
                        load_fail_vars.append(x)

            fluid.io.load_vars(exe,
                               dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR,
                               vars=load_vars)
            for var in load_vars:
                print_info("Parameter[{}] loaded sucessfully!".format(
                    var.name))
            for var in load_fail_vars:
                print_info(
                    "Parameter[{}] don't exist or shape does not match current network, skip"
                    " to load it.".format(var.name))
            print_info(
                "{}/{} pretrained parameters loaded successfully!".format(
                    len(load_vars),
                    len(load_vars) + len(load_fail_vars)))
        else:
            print_info(
                'Pretrained model dir {} not exists, training from scratch...'.
                format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

        fetch_list = [avg_loss.name, lr.name]

        global_step = 0
        all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE
        if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True:
            all_step += 1
        all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

        avg_loss = 0.0
        timer = Timer()
        timer.start()
        if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
            raise ValueError(
                ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]"
                 ).format(begin_epoch, cfg.SOLVER.NUM_EPOCHS))

        if args.use_mpio:
            print_info("Use multiprocess reader")
        else:
            print_info("Use multi-thread reader")

        best_miou = 0.0
        for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
            data_loader.start()
            while True:
                try:
                    loss, lr = exe.run(program=compiled_train_prog,
                                       fetch_list=fetch_list,
                                       return_numpy=True)
                    avg_loss += np.mean(np.array(loss))
                    global_step += 1

                    if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                        avg_loss /= args.log_steps
                        speed = args.log_steps / timer.elapsed_time()
                        print((
                            "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, global_step, lr[0], avg_loss, speed,
                                 calculate_eta(all_step - global_step, speed)))

                        sys.stdout.flush()
                        avg_loss = 0.0
                        timer.restart()

                except fluid.core.EOFException:
                    data_loader.reset()
                    break
                except Exception as e:
                    print(e)
            if epoch > cfg.SLIM.NAS_START_EVAL_EPOCH:
                ckpt_dir = save_checkpoint(exe, train_prog,
                                           '{}_tmp'.format(port))
                _, mean_iou, _, mean_acc = evaluate(cfg=cfg,
                                                    arch=arch,
                                                    ckpt_dir=ckpt_dir,
                                                    use_gpu=args.use_gpu,
                                                    use_mpio=args.use_mpio)
                if best_miou < mean_iou:
                    print('search step {}, epoch {} best iou {}'.format(
                        step, epoch, mean_iou))
                    best_miou = mean_iou

        sa_nas.reward(float(best_miou))
def evaluate(cfg, ckpt_dir=None, use_gpu=False, vis=False, vis_dir='vis_out/test_public', use_mpio=False, **kwargs):
    np.set_printoptions(precision=5, suppress=True)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    dataset = SegDataset(
        file_list=cfg.DATASET.VAL_FILE_LIST,
        mode=ModelPhase.EVAL,
        data_dir=cfg.DATASET.DATA_DIR)

    fls = []
    with open(cfg.DATASET.VAL_FILE_LIST) as fr:
        for line in fr.readlines():
            fls.append(line.strip().split(' ')[0])
    if vis:
        assert cfg.VIS.VISINEVAL is True
        if not os.path.exists(vis_dir):
            os.makedirs(vis_dir)

    def data_generator():
        #TODO: check is batch reader compatitable with Windows
        if use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        for b in data_gen:
            if cfg.DATASET.INPUT_IMAGE_NUM == 1:
                yield b[0], b[1], b[2]
            else:
                yield b[0], b[1], b[2], b[3]

    data_loader, avg_loss, pred, grts, masks = build_model(
        test_prog, startup_prog, phase=ModelPhase.EVAL)

    data_loader.set_sample_generator(
        data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE)

    # Get device environment
    places = fluid.cuda_places() if use_gpu else fluid.cpu_places()
    place = places[0]
    dev_count = len(places)
    print("#Device count: {}".format(dev_count))

    exe = fluid.Executor(place)
    exe.run(startup_prog)
    test_prog = test_prog.clone(for_test=True)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if not os.path.exists(ckpt_dir):
        raise ValueError('The TEST.TEST_MODEL {} is not found'.format(ckpt_dir))

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        try:
            fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
        except:
            fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    # Use streaming confusion matrix to calculate mean_iou
    np.set_printoptions(
        precision=4, suppress=True, linewidth=160, floatmode="fixed")
    class_num = cfg.DATASET.NUM_CLASSES
    conf_mat = ConfusionMatrix(class_num, streaming=True)
    fetch_list = [avg_loss.name, pred.name, grts.name, masks.name]
    num_images = 0
    step = 0
    all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
    timer = Timer()
    timer.start()
    data_loader.start()
    cnt = 0
    while True:
        try:
            step += 1
            loss, pred, grts, masks = exe.run(
                test_prog, fetch_list=fetch_list, return_numpy=True)
            if vis:
                preds = np.array(pred, dtype=np.float32)
                for j in range(preds.shape[0]):
                    if cnt > len(fls): continue
                    name = fls[cnt].split('/')[-1].split('.')[0]
                    p = np.squeeze(preds[j])
                    np.save(os.path.join(vis_dir, name + '.npy'), p)
                    cnt += 1
                print('vis %d npy... (%d tif sample)' % (cnt, cnt//36))
                continue

            loss = np.mean(np.array(loss))

            num_images += pred.shape[0]
            conf_mat.calculate(pred, grts, masks)
            _, iou = conf_mat.mean_iou()
            _, acc = conf_mat.accuracy()
            fwiou = conf_mat.frequency_weighted_iou()

            speed = 1.0 / timer.elapsed_time()

            print(
                "[EVAL]step={} loss={:.5f} acc={:.4f} IoU={:.4f} FWIoU={:.4f} step/sec={:.2f} | ETA {}"
                .format(step, loss, acc, iou, fwiou, speed, calculate_eta(all_step - step, speed)))
            timer.restart()
            sys.stdout.flush()
        except fluid.core.EOFException:
            break

    if vis:
        return

    category_iou, avg_iou = conf_mat.mean_iou()
    category_acc, avg_acc = conf_mat.accuracy()
    fwiou = conf_mat.frequency_weighted_iou()
    print("[EVAL]#image={} acc={:.4f} IoU={:.4f} FWIoU={:.4f}".format(
        num_images, avg_acc, avg_iou, fwiou))
    print("[EVAL]Category Acc:", category_acc)
    print("[EVAL]Category IoU:", category_iou)
    print("[EVAL]Kappa: {:.4f}".format(conf_mat.kappa()))

    return category_iou, avg_iou, category_acc, avg_acc
def visualize(cfg,
              vis_file_list=None,
              use_gpu=False,
              vis_dir="show",
              ckpt_dir=None,
              log_writer=None,
              local_test=False,
              **kwargs):
    if vis_file_list is None:
        vis_file_list = cfg.DATASET.VIS_FILE_LIST
    dataset = SegDataset(file_list=vis_file_list,
                         mode=ModelPhase.VISUAL,
                         data_dir=cfg.DATASET.DATA_DIR)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    pred, logit, out = build_model(test_prog,
                                   startup_prog,
                                   phase=ModelPhase.VISUAL)
    # Clone forward graph
    test_prog = test_prog.clone(for_test=True)

    # Generator full colormap for maximum 256 classes
    color_map = get_color_map_list(256)

    # Get device environment
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        try:
            fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
        except:
            fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    save_dir = "show"
    makedirs(save_dir)

    fetch_list = [pred.name, logit.name]
    test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True)
    img_cnt = 0
    for imgs, grts, img_names, valid_shapes, org_shapes in test_reader:
        pred_shape = (imgs.shape[2], imgs.shape[3])
        pred, logit = exe.run(program=test_prog,
                              feed={'image': imgs},
                              fetch_list=fetch_list,
                              return_numpy=True)

        num_imgs = pred.shape[0]
        # TODO: use multi-thread to write images
        for i in range(num_imgs):
            # Add more comments
            res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8)
            img_name = img_names[i]
            res_shape = (res_map.shape[0], res_map.shape[1])
            if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]:
                res_map = cv2.resize(res_map,
                                     pred_shape,
                                     interpolation=cv2.INTER_NEAREST)
            valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1])
            res_map = res_map[0:valid_shape[0], 0:valid_shape[1]]
            org_shape = (org_shapes[i, 0], org_shapes[i, 1])
            res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]),
                                 interpolation=cv2.INTER_NEAREST)

            png_fn = to_png_fn(img_name)
            # colorful segment result visualization
            vis_fn = os.path.join(save_dir, png_fn)
            dirname = os.path.dirname(vis_fn)
            makedirs(dirname)

            pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L')
            pred_mask.putpalette(color_map)
            # pred_mask.save(vis_fn)

            pred_mask_np = np.array(pred_mask.convert("RGB"))
            im_pred = PILImage.fromarray(pred_mask_np)

            # Original image
            # BGR->RGB
            img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR,
                                          img_name))[..., ::-1]
            im_ori = PILImage.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            # log_writer.add_image("Images/{}".format(img_name), img, epoch)
            # add ground truth (label) images

            im_pred_cat = PILImage.blend(im_ori, im_pred, 0.5)
            im_ori = join(im_ori, im_ori, flag="vertical")
            im_pred_cat = join(im_pred_cat, im_pred, flag="vertical")
            new_img = join(im_ori, im_pred_cat)
            new_img.save(vis_fn)

            img_cnt += 1
            print("#{} show image path: {}".format(img_cnt, vis_fn))
Example #10
0
def visualize(cfg,
              vis_file_list=None,
              use_gpu=False,
              vis_dir="visual",
              ckpt_dir=None,
              log_writer=None,
              local_test=False,
              **kwargs):
    if vis_file_list is None:
        vis_file_list = cfg.DATASET.TEST_FILE_LIST
    dataset = SegDataset(file_list=vis_file_list,
                         mode=ModelPhase.VISUAL,
                         data_dir=cfg.DATASET.DATA_DIR)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL)
    # Clone forward graph
    test_prog = test_prog.clone(for_test=True)

    # Generator full colormap for maximum 256 classes
    color_map = get_color_map_list(cfg.DATASET.NUM_CLASSES**2 if
                                   cfg.DATASET.NUM_CLASSES**2 < 256 else 256)

    # Get device environment
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        try:
            fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
        except:
            fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    save_dir = vis_dir
    makedirs(save_dir)

    fetch_list = [pred.name]
    test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True)
    img_cnt = 0

    def exe_run():
        if cfg.DATASET.INPUT_IMAGE_NUM == 1:
            for imgs, grts, img_names, valid_shapes, org_shapes in test_reader:
                pred_shape = (imgs.shape[2], imgs.shape[3])
                pred, = exe.run(program=test_prog,
                                feed={'image1': imgs},
                                fetch_list=fetch_list,
                                return_numpy=True)
                yield pred, pred_shape, grts, img_names, valid_shapes, org_shapes
        else:
            for img1s, img2s, grts, img1_names, img2_names, valid_shapes, org_shapes in test_reader:
                pred_shape = (img1s.shape[2], img1s.shape[3])
                pred, = exe.run(program=test_prog,
                                feed={
                                    'image1': img1s,
                                    'image2': img2s
                                },
                                fetch_list=fetch_list,
                                return_numpy=True)
                yield pred, pred_shape, grts, img1_names, valid_shapes, org_shapes

    for pred, pred_shape, grts, img_names, valid_shapes, org_shapes in exe_run(
    ):
        idx = pred.shape[0]
        if cfg.DATASET.INPUT_IMAGE_NUM == 2 and cfg.VIS.SEG_FOR_CD:
            idx = pred.shape[0] // cfg.DATASET.INPUT_IMAGE_NUM
        pred1, pred2 = pred[:idx], pred[
            idx:]  # fluid.layers.split(pred, 2, dim=0)
        num_imgs = pred1.shape[0]
        # TODO: use multi-thread to write images
        for i in range(num_imgs):
            # Add more comments
            res_map_list = []
            for pred in [pred1, pred2]:
                if pred.shape[0] == 0:
                    continue
                #res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8)
                res_map = np.squeeze(pred[i, :, :, :]).astype(np.float32)
                res_shape = (res_map.shape[0], res_map.shape[1])
                if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[
                        1]:
                    res_map = cv2.resize(res_map,
                                         pred_shape,
                                         interpolation=cv2.INTER_NEAREST)
                valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1])
                res_map = res_map[0:valid_shape[0], 0:valid_shape[1]]
                org_shape = (org_shapes[i, 0], org_shapes[i, 1])
                res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]),
                                     interpolation=cv2.INTER_NEAREST)
                res_map_list.append(res_map)

            img_name = img_names[i]
            png_fn = to_png_fn(img_name)

            # colorful segment result visualization
            vis_fn = os.path.join(save_dir, png_fn)
            dirname = os.path.dirname(vis_fn)
            makedirs(dirname)

            if cfg.DATASET.INPUT_IMAGE_NUM == 1 or \
                    (cfg.DATASET.INPUT_IMAGE_NUM == 2 and not cfg.VIS.SEG_FOR_CD):
                res_map = res_map_list[0]
                if cfg.VIS.RAW_PRED:
                    #pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L')
                    #pred_mask.save(vis_fn)
                    np.save(vis_fn.replace(".png", ".npy"),
                            res_map.astype(np.float32))
                else:
                    if cfg.VIS.ADD_LABEL:
                        grt_im = cv2.resize(grts[i],
                                            pred_shape,
                                            interpolation=cv2.INTER_NEAREST)
                        res_map = np.hstack((res_map, grt_im))
                    pred_mask = PILImage.fromarray(res_map.astype(np.uint8),
                                                   mode='P')
                    pred_mask.putpalette(color_map)
                    pred_mask.save(vis_fn)
            else:
                res_map1, res_map2 = res_map_list
                diff = res_map1 * cfg.DATASET.NUM_CLASSES + res_map2
                unchange_idx = np.where((res_map1 - res_map2) == 0)
                diff[unchange_idx] = 0
                res_map = np.hstack((res_map1, res_map2, diff))
                pred_mask = PILImage.fromarray(res_map.astype(np.uint8),
                                               mode='P')
                pred_mask.putpalette(color_map)
                pred_mask.save(vis_fn)

            img_cnt += 1
            print("#{} visualize image path: {}".format(img_cnt, vis_fn))

            # Use VisualDL to visualize image
            if log_writer is not None:
                # Calulate epoch from ckpt_dir folder name
                epoch = int(os.path.split(ckpt_dir)[-1])
                print("VisualDL visualization epoch", epoch)

                pred_mask_np = np.array(pred_mask.convert("RGB"))
                log_writer.add_image("Predict/{}".format(img_name),
                                     pred_mask_np, epoch)
                # Original image
                # BGR->RGB
                img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR,
                                              img_name))[..., ::-1]
                log_writer.add_image("Images/{}".format(img_name), img, epoch)
                # add ground truth (label) images
                grt = grts[i]
                if grt is not None:
                    grt = grt[0:valid_shape[0], 0:valid_shape[1]]
                    grt_pil = PILImage.fromarray(grt.astype(np.uint8),
                                                 mode='P')
                    grt_pil.putpalette(color_map)
                    grt_pil = grt_pil.resize((org_shape[1], org_shape[0]))
                    grt = np.array(grt_pil.convert("RGB"))
                    log_writer.add_image("Label/{}".format(img_name), grt,
                                         epoch)

        # If in local_test mode, only visualize 5 images just for testing
        # procedure
        if local_test and img_cnt >= 5:
            break