Esempio n. 1
0
def train(cfg):
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    drop_last = True

    dataset = LaneNetDataset(
        file_list=cfg.DATASET.TRAIN_FILE_LIST,
        mode=ModelPhase.TRAIN,
        shuffle=True,
        data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item
                batch_data = []

    # Get device environment
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#Device count: {}".format(dev_count))

    # Make sure BATCH_SIZE can divided by GPU cards
    assert cfg.BATCH_SIZE % dev_count == 0, (
        'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format(
            cfg.BATCH_SIZE, dev_count))
    # If use multi-gpu training mode, batch data will allocated to each GPU evenly
    batch_size_per_dev = cfg.BATCH_SIZE // dev_count
    cfg.BATCH_SIZE_PER_DEV = batch_size_per_dev
    print_info("batch_size_per_dev: {}".format(batch_size_per_dev))

    data_loader, avg_loss, lr, pred, grts, masks, emb_loss, seg_loss, accuracy, fp, fn = build_model(
        train_prog, startup_prog, phase=ModelPhase.TRAIN)
    data_loader.set_sample_generator(
        data_generator, batch_size=batch_size_per_dev, drop_last=drop_last)

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    exec_strategy = fluid.ExecutionStrategy()
    # Clear temporary variables every 100 iteration
    if args.use_gpu:
        exec_strategy.num_threads = fluid.core.get_cuda_device_count()
    exec_strategy.num_iteration_per_drop_scope = 100
    build_strategy = fluid.BuildStrategy()

    if cfg.NUM_TRAINERS > 1 and args.use_gpu:
        dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog)
        exec_strategy.num_threads = 1

    if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
        if dev_count > 1:
            # Apply sync batch norm strategy
            print_info("Sync BatchNorm strategy is effective.")
            build_strategy.sync_batch_norm = True
        else:
            print_info(
                "Sync BatchNorm strategy will not be effective if GPU device"
                " count <= 1")
    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=avg_loss.name,
        exec_strategy=exec_strategy,
        build_strategy=build_strategy)

    # Resume training
    begin_epoch = cfg.SOLVER.BEGIN_EPOCH
    if cfg.TRAIN.RESUME_MODEL_DIR:
        begin_epoch = load_checkpoint(exe, train_prog)
    # Load pretrained model
    elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
        load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR)
    else:
        print_info(
            'Pretrained model dir {} not exists, training from scratch...'.
            format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

    # fetch_list = [avg_loss.name, lr.name, accuracy.name, precision.name, recall.name]
    fetch_list = [
        avg_loss.name, lr.name, seg_loss.name, emb_loss.name, accuracy.name,
        fp.name, fn.name
    ]
    if args.debug:
        # Fetch more variable info and use streaming confusion matrix to
        # calculate IoU results if in debug mode
        np.set_printoptions(
            precision=4, suppress=True, linewidth=160, floatmode="fixed")
        fetch_list.extend([pred.name, grts.name, masks.name])
        # cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)

    if args.use_vdl:
        if not args.vdl_log_dir:
            print_info("Please specify the log directory by --vdl_log_dir.")
            exit(1)

        from visualdl import LogWriter
        log_writer = LogWriter(args.vdl_log_dir)

    # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
    # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    step = 0
    all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE
    if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True:
        all_step += 1
    all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

    avg_loss = 0.0
    avg_seg_loss = 0.0
    avg_emb_loss = 0.0
    avg_acc = 0.0
    avg_fp = 0.0
    avg_fn = 0.0
    timer = Timer()
    timer.start()
    if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
        raise ValueError(
            ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
                begin_epoch, cfg.SOLVER.NUM_EPOCHS))

    if args.use_mpio:
        print_info("Use multiprocess reader")
    else:
        print_info("Use multi-thread reader")

    for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
        data_loader.start()
        while True:
            try:
                # If not in debug mode, avoid unnessary log and calculate
                loss, lr, out_seg_loss, out_emb_loss, out_acc, out_fp, out_fn = exe.run(
                    program=compiled_train_prog,
                    fetch_list=fetch_list,
                    return_numpy=True)

                avg_loss += np.mean(np.array(loss))
                avg_seg_loss += np.mean(np.array(out_seg_loss))
                avg_emb_loss += np.mean(np.array(out_emb_loss))
                avg_acc += np.mean(out_acc)
                avg_fp += np.mean(out_fp)
                avg_fn += np.mean(out_fn)
                step += 1

                if step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                    avg_loss /= args.log_steps
                    avg_seg_loss /= args.log_steps
                    avg_emb_loss /= args.log_steps
                    avg_acc /= args.log_steps
                    avg_fp /= args.log_steps
                    avg_fn /= args.log_steps
                    speed = args.log_steps / timer.elapsed_time()
                    print((
                        "epoch={} step={} lr={:.5f} loss={:.4f} seg_loss={:.4f} emb_loss={:.4f} accuracy={:.4} fp={:.4} fn={:.4} step/sec={:.3f} | ETA {}"
                    ).format(epoch, step, lr[0], avg_loss, avg_seg_loss,
                             avg_emb_loss, avg_acc, avg_fp, avg_fn, speed,
                             calculate_eta(all_step - step, speed)))
                    if args.use_vdl:
                        log_writer.add_scalar('Train/loss', avg_loss, step)
                        log_writer.add_scalar('Train/lr', lr[0], step)
                        log_writer.add_scalar('Train/speed', speed, step)
                    sys.stdout.flush()
                    avg_loss = 0.0
                    avg_seg_loss = 0.0
                    avg_emb_loss = 0.0
                    avg_acc = 0.0
                    avg_fp = 0.0
                    avg_fn = 0.0
                    timer.restart()

            except fluid.core.EOFException:
                data_loader.reset()
                break
            except Exception as e:
                print(e)

        if epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 and cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(exe, train_prog, epoch)

            if args.do_eval:
                print("Evaluation start")
                accuracy, fp, fn = evaluate(
                    cfg=cfg,
                    ckpt_dir=ckpt_dir,
                    use_gpu=args.use_gpu,
                    use_mpio=args.use_mpio)
                if args.use_vdl:
                    log_writer.add_scalar('Evaluate/accuracy', accuracy, step)
                    log_writer.add_scalar('Evaluate/fp', fp, step)
                    log_writer.add_scalar('Evaluate/fn', fn, step)

            # Use VisualDL to visualize results
            if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None:
                visualize(
                    cfg=cfg,
                    use_gpu=args.use_gpu,
                    vis_file_list=cfg.DATASET.VIS_FILE_LIST,
                    vis_dir="visual",
                    ckpt_dir=ckpt_dir,
                    log_writer=log_writer)

    # save final model
    if cfg.TRAINER_ID == 0:
        save_checkpoint(exe, train_prog, 'final')
Esempio n. 2
0
def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
    np.set_printoptions(precision=5, suppress=True)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()

    dataset = LaneNetDataset(file_list=cfg.DATASET.VAL_FILE_LIST,
                             mode=ModelPhase.TRAIN,
                             shuffle=True,
                             data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        #TODO: check is batch reader compatitable with Windows
        if use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        for b in data_gen:
            yield b

    data_loader, pred, grts, masks, accuracy, fp, fn = build_model(
        test_prog, startup_prog, phase=ModelPhase.EVAL)

    data_loader.set_sample_generator(data_generator,
                                     drop_last=False,
                                     batch_size=cfg.BATCH_SIZE)

    # Get device environment
    places = fluid.cuda_places() if use_gpu else fluid.cpu_places()
    place = places[0]
    dev_count = len(places)
    print("#Device count: {}".format(dev_count))

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    test_prog = test_prog.clone(for_test=True)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    # Use streaming confusion matrix to calculate mean_iou
    np.set_printoptions(precision=4,
                        suppress=True,
                        linewidth=160,
                        floatmode="fixed")
    fetch_list = [
        pred.name, grts.name, masks.name, accuracy.name, fp.name, fn.name
    ]
    num_images = 0
    step = 0
    avg_acc = 0.0
    avg_fp = 0.0
    avg_fn = 0.0
    # cur_images = 0
    all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
    timer = Timer()
    timer.start()
    data_loader.start()
    while True:
        try:
            step += 1
            pred, grts, masks, out_acc, out_fp, out_fn = exe.run(
                test_prog, fetch_list=fetch_list, return_numpy=True)

            avg_acc += np.mean(out_acc) * pred.shape[0]
            avg_fp += np.mean(out_fp) * pred.shape[0]
            avg_fn += np.mean(out_fn) * pred.shape[0]
            num_images += pred.shape[0]

            speed = 1.0 / timer.elapsed_time()

            print(
                "[EVAL]step={} accuracy={:.4f} fp={:.4f} fn={:.4f} step/sec={:.2f} | ETA {}"
                .format(step, avg_acc / num_images, avg_fp / num_images,
                        avg_fn / num_images, speed,
                        calculate_eta(all_step - step, speed)))

            timer.restart()
            sys.stdout.flush()
        except fluid.core.EOFException:
            break

    print("[EVAL]#image={} accuracy={:.4f} fp={:.4f} fn={:.4f}".format(
        num_images, avg_acc / num_images, avg_fp / num_images,
        avg_fn / num_images))

    return avg_acc / num_images, avg_fp / num_images, avg_fn / num_images
Esempio n. 3
0
def visualize(cfg,
              vis_file_list=None,
              use_gpu=False,
              vis_dir="visual",
              also_save_raw_results=False,
              ckpt_dir=None,
              log_writer=None,
              local_test=False,
              **kwargs):
    if vis_file_list is None:
        vis_file_list = cfg.DATASET.TEST_FILE_LIST

    dataset = LaneNetDataset(file_list=vis_file_list,
                             mode=ModelPhase.VISUAL,
                             shuffle=True,
                             data_dir=cfg.DATASET.DATA_DIR)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL)
    # Clone forward graph
    test_prog = test_prog.clone(for_test=True)

    # Get device environment
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    save_dir = os.path.join(vis_dir, 'visual_results')
    makedirs(save_dir)
    if also_save_raw_results:
        raw_save_dir = os.path.join(vis_dir, 'raw_results')
        makedirs(raw_save_dir)

    fetch_list = [pred.name, logit.name]
    test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True)

    postprocessor = lanenet_postprocess.LaneNetPostProcessor()
    for imgs, grts, grts_instance, img_names, valid_shapes, org_imgs in test_reader:
        segLogits, emLogits = exe.run(program=test_prog,
                                      feed={'image': imgs},
                                      fetch_list=fetch_list,
                                      return_numpy=True)
        num_imgs = segLogits.shape[0]

        for i in range(num_imgs):
            gt_image = org_imgs[i]
            binary_seg_image, instance_seg_image = segLogits[i].squeeze(
                -1), emLogits[i].transpose((1, 2, 0))

            postprocess_result = postprocessor.postprocess(
                binary_seg_result=binary_seg_image,
                instance_seg_result=instance_seg_image,
                source_image=gt_image)
            pred_binary_fn = os.path.join(
                save_dir, to_png_fn(img_names[i], name='_pred_binary'))
            pred_lane_fn = os.path.join(
                save_dir, to_png_fn(img_names[i], name='_pred_lane'))
            pred_instance_fn = os.path.join(
                save_dir, to_png_fn(img_names[i], name='_pred_instance'))
            dirname = os.path.dirname(pred_binary_fn)

            makedirs(dirname)
            mask_image = postprocess_result['mask_image']
            for i in range(4):
                instance_seg_image[:, :,
                                   i] = minmax_scale(instance_seg_image[:, :,
                                                                        i])
            embedding_image = np.array(instance_seg_image).astype(np.uint8)

            plt.figure('mask_image')
            plt.imshow(mask_image[:, :, (2, 1, 0)])
            plt.figure('src_image')
            plt.imshow(gt_image[:, :, (2, 1, 0)])
            plt.figure('instance_image')
            plt.imshow(embedding_image[:, :, (2, 1, 0)])
            plt.figure('binary_image')
            plt.imshow(binary_seg_image * 255, cmap='gray')
            plt.show()

            cv2.imwrite(pred_binary_fn,
                        np.array(binary_seg_image * 255).astype(np.uint8))
            cv2.imwrite(pred_lane_fn, postprocess_result['source_image'])
            cv2.imwrite(pred_instance_fn, mask_image)
            print(pred_lane_fn, 'saved!')