Esempio n. 1
0
def main():
    # Build model.
    model = model_builder.build_model(cfg=cfg)

    # Read checkpoint.
    ckpt = torch.load(
        cfg.MODEL.PATH2CKPT,
        map_location=torch.device("cpu")) if cfg.GENERAL.RESUME else {}

    if cfg.GENERAL.RESUME:
        with utils.log_info(msg="Load pre-trained model.",
                            level="INFO",
                            state=True):
            model.load_state_dict(ckpt["model"])
    # Set device.
    model, device = utils.set_device(model, cfg.GENERAL.GPU)

    try:
        test_data_loader = data_loader.build_data_loader(
            cfg, cfg.DATA.DATASET, "test")
        generate(cfg=cfg,
                 model=model,
                 data_loader=test_data_loader,
                 device=device)
    except:
        utils.notify("Can not build data loader for test set.", level="ERROR")
        raise ValueError("")
Esempio n. 2
0
 def create_model_ops(model, loss_scale):
     return model_builder.build_model(
         model=model,
         model_name=args.model_name,
         model_depth=args.model_depth,
         num_labels=args.num_labels,
         batch_size=args.batch_size,
         num_channels=args.num_channels,
         crop_size=args.crop_size,
         clip_length=(args.clip_length_of
                      if args.input_type else args.clip_length_rgb),
         loss_scale=loss_scale,
         pred_layer_name=args.pred_layer_name,
         multi_label=args.multi_label,
         channel_multiplier=args.channel_multiplier,
         bottleneck_multiplier=args.bottleneck_multiplier,
         use_dropout=args.use_dropout,
         conv1_temporal_stride=args.conv1_temporal_stride,
         conv1_temporal_kernel=args.conv1_temporal_kernel,
         use_pool1=args.use_pool1,
         audio_input_3d=args.audio_input_3d,
         g_blend=args.g_blend,
         audio_weight=args.audio_weight,
         visual_weight=args.visual_weight,
         av_weight=args.av_weight,
     )
Esempio n. 3
0
def export_inference_model(args):
    """
    Export PaddlePaddle inference model for prediction depolyment and serving.
    """
    print("Exporting inference model...")
    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    image, logit_out = build_model(infer_prog,
                                   startup_prog,
                                   phase=ModelPhase.PREDICT)

    # Use CPU for exporting inference model instead of GPU
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)
    infer_prog = infer_prog.clone(for_test=True)

    if os.path.exists(cfg.TEST.TEST_MODEL):
        fluid.io.load_params(exe, cfg.TEST.TEST_MODEL, main_program=infer_prog)
    else:
        print("TEST.TEST_MODEL diretory is empty!")
        exit(-1)

    fluid.io.save_inference_model(cfg.FREEZE.SAVE_DIR,
                                  feeded_var_names=[image.name],
                                  target_vars=[logit_out],
                                  executor=exe,
                                  main_program=infer_prog,
                                  model_filename=cfg.FREEZE.MODEL_FILENAME,
                                  params_filename=cfg.FREEZE.PARAMS_FILENAME)
    print("Inference model exported!")
    print("Exporting inference model config...")
    deploy_cfg_path = export_inference_config()
    print("Inference model saved : [%s]" % (deploy_cfg_path))
Esempio n. 4
0
    def init_lanenet(self):
        '''
        initlize the paddlepaddle model
        '''

        startup_prog = fluid.Program()
        test_prog = fluid.Program()
        self.pred, self.logit = build_model(test_prog,
                                            startup_prog,
                                            phase=ModelPhase.VISUAL)
        # Clone forward graph
        test_prog = test_prog.clone(for_test=True)

        # Get device environment
        place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
        self.exe = fluid.Executor(place)
        self.exe.run(startup_prog)

        ckpt_dir = self.weight_path
        if ckpt_dir is not None:
            print('load test model:', ckpt_dir)
            try:
                fluid.load(test_prog, os.path.join(ckpt_dir, 'model'),
                           self.exe)
            except:
                fluid.io.load_params(self.exe,
                                     ckpt_dir,
                                     main_program=test_prog)

        self.postprocessor = lanenet_postprocess.LaneNetPostProcessor()
Esempio n. 5
0
 def create_model_ops(model, loss_scale):
     return model_builder.build_model(
         model=model,
         model_name=args.model_name,
         model_depth=args.model_depth,
         num_labels=args.num_labels,
         num_channels=args.num_channels,
         crop_size=args.crop_size,
         clip_length=(args.clip_length_of
                      if args.input_type else args.clip_length_rgb),
         loss_scale=loss_scale,
         pred_layer_name=args.pred_layer_name,
     )
Esempio n. 6
0
def export_inference_model(args):
    """
    Export PaddlePaddle inference model for prediction depolyment and serving.
    """
    print("Exporting inference model...")
    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    image, logit_out = build_model(infer_prog,
                                   startup_prog,
                                   phase=ModelPhase.PREDICT)

    # Use CPU for exporting inference model instead of GPU
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)
    infer_prog = infer_prog.clone(for_test=True)
    not_quant_pattern_list = []
    if args.not_quant_pattern is not None:
        not_quant_pattern_list = args.not_quant_pattern

    config = {
        'weight_quantize_type': 'channel_wise_abs_max',
        'activation_quantize_type': 'moving_average_abs_max',
        'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
        'not_quant_pattern': not_quant_pattern_list
    }

    infer_prog = quant_aware(infer_prog, place, config, for_test=True)
    if os.path.exists(cfg.TEST.TEST_MODEL):
        fluid.io.load_persistables(exe,
                                   cfg.TEST.TEST_MODEL,
                                   main_program=infer_prog)
    else:
        print("TEST.TEST_MODEL diretory is empty!")
        exit(-1)

    infer_prog = convert(infer_prog, place, config)

    fluid.io.save_inference_model(cfg.FREEZE.SAVE_DIR,
                                  feeded_var_names=[image.name],
                                  target_vars=[logit_out],
                                  executor=exe,
                                  main_program=infer_prog,
                                  model_filename=cfg.FREEZE.MODEL_FILENAME,
                                  params_filename=cfg.FREEZE.PARAMS_FILENAME)
    print("Inference model exported!")
    print("Exporting inference model config...")
    deploy_cfg_path = export_inference_config()
    print("Inference model saved : [%s]" % (deploy_cfg_path))
def start_training(EPOCHS, device, train_loader, test_loader, **models_dict):
    results = {}
    logger.info("\n**** Started training ****\n")
    for model_type in models_dict:
        #print(f"Model: {model_type}")
        logger.info(f"\nModel: {model_type}\n")
        train_accs, train_losses, test_acc, test_losses, best_model = build_model(
            EPOCHS, device, train_loader, test_loader,
            **models_dict[model_type])
        results[model_type] = [
            train_accs, train_losses, test_acc, test_losses, best_model
        ]
        #print(results)
        logger.info(f"\nresults : {results}\n")
        time.sleep(10)
    logger.info("\n**** Ended training ****\n")
    return results
Esempio n. 8
0
def export_serving_model(args):
    """
    Export PaddlePaddle inference model for prediction depolyment and serving.
    """
    print("Exporting serving model...")
    startup_prog = fluid.Program()
    infer_prog = fluid.Program()
    image, logit_out = build_model(infer_prog,
                                   startup_prog,
                                   phase=ModelPhase.PREDICT)

    # Use CPU for exporting inference model instead of GPU
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)
    infer_prog = infer_prog.clone(for_test=True)

    if os.path.exists(cfg.TEST.TEST_MODEL):
        print('load test model:', cfg.TEST.TEST_MODEL)
        try:
            fluid.load(infer_prog, os.path.join(cfg.TEST.TEST_MODEL, 'model'),
                       exe)
        except:
            fluid.io.load_params(exe,
                                 cfg.TEST.TEST_MODEL,
                                 main_program=infer_prog)
    else:
        print("TEST.TEST_MODEL diretory is empty!")
        exit(-1)

    from paddle_serving_client.io import save_model
    save_model(
        cfg.FREEZE.SAVE_DIR + "/serving_server",
        cfg.FREEZE.SAVE_DIR + "/serving_client",
        {image.name: image},
        {logit_out.name: logit_out},
        infer_prog,
    )
    print("Serving model exported!")
    print("Exporting serving model config...")
    deploy_cfg_path = export_inference_config()
    print("Serving model saved : [%s]" % (deploy_cfg_path))
Esempio n. 9
0
 def create_model_ops(model, loss_scale):
     return model_builder.build_model(
         model=model,
         model_name=args.model_name,
         model_depth=args.model_depth,
         num_labels=args.num_labels,
         batch_size=args.batch_size,
         num_channels=args.num_channels,
         crop_size=args.crop_size,
         clip_length=(args.clip_length_of
                      if args.input_type == 1 else args.clip_length_rgb),
         loss_scale=loss_scale,
         is_test=1,
         multi_label=args.multi_label,
         channel_multiplier=args.channel_multiplier,
         bottleneck_multiplier=args.bottleneck_multiplier,
         use_dropout=args.use_dropout,
         use_convolutional_pred=args.use_convolutional_pred,
         use_pool1=args.use_pool1,
     )
Esempio n. 10
0
def test(cfg):
    """
    Test a model
    """
    logging.setup_logging(logger, cfg)

    logger.info("Test with config")
    logger.info(pprint.pformat(cfg))

    model = model_builder.build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model)

    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        logger.info("Load from given checkpoint file.")
        gs, checkpoint_epoch = cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            optimizer=None,
            inflation=False,
            convert_from_caffe2=False)
        start_epoch = checkpoint_epoch + 1
    elif cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        gs, checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model,
                                                  cfg.NUM_GPUS > 1, None)
        start_epoch = checkpoint_epoch + 1

    # Create the video train and val loaders.
    test_loader = loader.construct_loader(cfg, "test")

    test_meter = TestMeter(cfg)

    if cfg.TEST.AUGMENT_TEST:
        evaluate_with_augmentation(test_loader, model, test_meter, cfg)
    else:
        evaluate(test_loader, model, test_meter, cfg)
Esempio n. 11
0
def load_model():
    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL)
    fetch_list = [pred.name]
    # Clone forward graph
    test_prog = test_prog.clone(for_test=True)

    # Get device environment
    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    ckpt_dir = cfg.TEST.TEST_MODEL
    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        try:
            fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
        except:
            fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    # # Get device environment
    # places = [fluid.CUDAPlace(i) for i in range(4)]
    # exes = [fluid.Executor(places[i]) for i in range(4)]
    # for exe in exes:
    #     exe.run(startup_prog)
    #
    # ckpt_dir = cfg.TEST.TEST_MODEL
    # if ckpt_dir is not None:
    #     print('load test model:', ckpt_dir)
    #     for i in range(4):
    #         try:
    #             fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exes[i])
    #         except:
    #             fluid.io.load_params(exes[i], ckpt_dir, main_program=test_prog)

    return fetch_list, test_prog, exe  #s
Esempio n. 12
0
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Setup logging format.
    logging.setup_logging(logger, cfg)

    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Record global step
    gs = 0

    # Load a checkpoint to resume training if applicable.
    if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        gs, checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model,
                                                  cfg.NUM_GPUS > 1, optimizer)
        start_epoch = checkpoint_epoch + 1
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        logger.info("Load from given checkpoint file.")
        if cfg.TRAIN.LOAD_PART_OF_CHECKPOINT:
            gs, checkpoint_epoch = cu.load_part_of_checkpoint(
                cfg.TRAIN.CHECKPOINT_FILE_PATH,
                model,
                cfg.NUM_GPUS > 1,
                optimizer=None)
        else:
            gs, checkpoint_epoch = cu.load_checkpoint(
                cfg.TRAIN.CHECKPOINT_FILE_PATH,
                model,
                cfg.NUM_GPUS > 1,
                optimizer=None,
                inflation=False,
                convert_from_caffe2=False)
        start_epoch = checkpoint_epoch + 1
    else:
        gs = 0
        start_epoch = 0

    # Create the video train and val loaders.
    train_loader = loader.construct_loader(cfg, "train")
    val_loader = loader.construct_loader(cfg, "val")

    # Create meters.
    train_meter = TrainMeter(len(train_loader), cfg)
    val_meter = ValMeter(cfg)

    # Perform the training loop.
    logger.info("Start epoch: {} gs {}".format(start_epoch + 1, gs + 1))

    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)

        # Evaluate the model on validation set.
        if misc.is_eval_epoch(cfg, cur_epoch):
            if cfg.TRAIN.USE_CENTER_VALIDATION:
                validation_epoch_center(val_loader, model, val_meter,
                                        cur_epoch, cfg)
            else:
                validation_epoch(val_loader, model, val_meter, cur_epoch, cfg)
        # Train for one epoch.
        gs = train_epoch(train_loader, model, optimizer, train_meter,
                         cur_epoch, gs, cfg)

        # Compute precise BN stats.
        # if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0:
        #     calculate_and_update_precise_bn(
        #         train_loader, model, cfg.BN.NUM_BATCHES_PRECISE
        #     )
        # Save a checkpoint.
        if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD):
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, gs,
                               cfg)
Esempio n. 13
0
def main():
    # Set logger to record information.
    utils.check_env(cfg)
    logger = Logger(cfg)
    logger.log_info(cfg)
    metrics_handler = MetricsHandler(cfg.metrics)
    # utils.pack_code(cfg, logger=logger)

    # Build model.
    model = model_builder.build_model(cfg=cfg, logger=logger)
    optimizer = optimizer_helper.build_optimizer(cfg=cfg, model=model)
    lr_scheduler = lr_scheduler_helper.build_scheduler(cfg=cfg,
                                                       optimizer=optimizer)

    # Read checkpoint.
    ckpt = torch.load(cfg.model.path2ckpt) if cfg.gnrl.resume else {}
    if cfg.gnrl.resume:
        with logger.log_info(msg="Load pre-trained model.",
                             level="INFO",
                             state=True,
                             logger=logger):
            model.load_state_dict(ckpt["model"])
            optimizer.load_state_dict(ckpt["optimizer"])
            lr_scheduler.load_state_dict(ckpt["lr_scheduler"])

    # Set device.
    model, device = utils.set_pipline(
        model, cfg) if cfg.gnrl.PIPLINE else utils.set_device(
            model, cfg.gnrl.cuda)

    resume_epoch = ckpt["epoch"] if cfg.gnrl.resume else 0
    loss_fn = loss_fn_helper.build_loss_fn(cfg=cfg)

    # Prepare dataset.
    train_loaders, valid_loaders, test_loaders = dict(), dict(), dict()
    for dataset in cfg.data.datasets:
        if cfg.data[dataset].TRAIN:
            try:
                train_loaders[dataset] = data_loader.build_data_loader(
                    cfg, dataset, "train")
            except:
                utils.notify(msg="Failed to build train loader of %s" %
                             dataset)
        if cfg.data[dataset].VALID:
            try:
                valid_loaders[dataset] = data_loader.build_data_loader(
                    cfg, dataset, "valid")
            except:
                utils.notify(msg="Failed to build valid loader of %s" %
                             dataset)
        if cfg.data[dataset].TEST:
            try:
                test_loaders[dataset] = data_loader.build_data_loader(
                    cfg, dataset, "test")
            except:
                utils.notify(msg="Failed to build test loader of %s" % dataset)

    # TODO Train, evaluate model and save checkpoint.
    for epoch in range(cfg.train.max_epoch):
        epoch += 1
        if resume_epoch >= epoch:
            continue

        eval_kwargs = {
            "epoch": epoch,
            "cfg": cfg,
            "model": model,
            "loss_fn": loss_fn,
            "device": device,
            "metrics_handler": metrics_handler,
            "logger": logger,
            "save": cfg.save.save,
        }
        train_kwargs = {
            "epoch": epoch,
            "cfg": cfg,
            "model": model,
            "loss_fn": loss_fn,
            "optimizer": optimizer,
            "device": device,
            "lr_scheduler": lr_scheduler,
            "metrics_handler": metrics_handler,
            "logger": logger,
        }
        ckpt_kwargs = {
            "epoch": epoch,
            "cfg": cfg,
            "model": model.state_dict(),
            "metrics_handler": metrics_handler,
            "optimizer": optimizer.state_dict(),
            "lr_scheduler": lr_scheduler.state_dict(),
        }

        for dataset in cfg.data.datasets:
            if cfg.data[dataset].TRAIN:
                utils.notify("Train on %s" % dataset)
                train_one_epoch(data_loader=train_loaders[dataset],
                                **train_kwargs)

        utils.save_ckpt(path2file=cfg.model.path2ckpt, **ckpt_kwargs)

        if epoch in cfg.gnrl.ckphs:
            utils.save_ckpt(path2file=os.path.join(
                cfg.model.ckpts,
                cfg.gnrl.id + "_" + str(epoch).zfill(5) + ".pth"),
                            **ckpt_kwargs)
            for dataset in cfg.data.datasets:
                utils.notify("Evaluating test set of %s" % dataset,
                             logger=logger)
                if cfg.data[dataset].TEST:
                    evaluate(data_loader=test_loaders[dataset],
                             phase="test",
                             **eval_kwargs)

        for dataset in cfg.data.datasets:
            utils.notify("Evaluating valid set of %s" % dataset, logger=logger)
            if cfg.data[dataset].VALID:
                evaluate(data_loader=valid_loaders[dataset],
                         phase="valid",
                         **eval_kwargs)
    # End of train-valid for loop.

    eval_kwargs = {
        "epoch": epoch,
        "cfg": cfg,
        "model": model,
        "loss_fn": loss_fn,
        "device": device,
        "metrics_handler": metrics_handler,
        "logger": logger,
        "save": cfg.save.save,
    }

    for dataset in cfg.data.datasets:
        if cfg.data[dataset].VALID:
            utils.notify("Evaluating valid set of %s" % dataset, logger=logger)
            evaluate(data_loader=valid_loaders[dataset],
                     phase="valid",
                     **eval_kwargs)
    for dataset in cfg.data.datasets:
        if cfg.data[dataset].TEST:
            utils.notify("Evaluating test set of %s" % dataset, logger=logger)
            evaluate(data_loader=test_loaders[dataset],
                     phase="test",
                     **eval_kwargs)

    for dataset in cfg.data.datasets:
        if "train" in cfg.data[dataset].INFER:
            utils.notify("Inference on train set of %s" % dataset)
            inference(data_loader=train_loaders[dataset],
                      phase="infer_train",
                      **eval_kwargs)
        if "valid" in cfg.data[dataset].INFER:
            utils.notify("Inference on valid set of %s" % dataset)
            inference(data_loader=valid_loaders[dataset],
                      phase="infer_valid",
                      **eval_kwargs)
        if "test" in cfg.data[dataset].INFER:
            utils.notify("Inference on test set of %s" % dataset)
            inference(data_loader=test_loaders[dataset],
                      phase="infer_test",
                      **eval_kwargs)

    return None
Esempio n. 14
0
def visualize(cfg1,
              vis_file_list=None,
              use_gpu=False,
              vis_dir="visual",
              ckpt_dir=None,
              log_writer=None,
              local_test=False,
              **kwargs):
    if vis_file_list is None:
        vis_file_list = cfg1.DATASET.VIS_FILE_LIST
    dataset = SegDataset(
        file_list=vis_file_list,
        mode=ModelPhase.VISUAL,
        data_dir=cfg.DATASET.DATA_DIR)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    pred, logit, out = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL)
    # Clone forward graph
    test_prog = test_prog.clone(for_test=True)

    # Generator full colormap for maximum 256 classes
    color_map = get_color_map_list(256)

    # Get device environment
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()

    exe = fluid.Executor(place)
    exe.run(startup_prog)
    ckpt_dir = cfg1.TEST.TEST_MODEL
    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        try:
            fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
        except:
            fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
    save_dir = vis_dir
    makedirs(save_dir)

    fetch_list = [pred.name, out.name]
    test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True)
    img_cnt = 0
    for imgs, grts, img_names, valid_shapes, org_shapes in test_reader:
        pred_shape = (imgs.shape[2], imgs.shape[3])
        pred, out, = exe.run(
            program=test_prog,
            feed={'image': imgs},
            fetch_list=fetch_list,
            return_numpy=True)

        out = np.array(out)
        pred = np.argmax(out, axis=3)
        pred = np.expand_dims(pred, -1)

        num_imgs = pred.shape[0]
        # TODO: use multi-thread to write images
        for i in range(num_imgs):
            # Add more comments
            res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8)
            img_name = img_names[i]
            res_shape = (res_map.shape[0], res_map.shape[1])
            if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]:
                res_map = cv2.resize(
                    res_map, pred_shape, interpolation=cv2.INTER_NEAREST)
            valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1])
            res_map = res_map[0:valid_shape[0], 0:valid_shape[1]]
            org_shape = (org_shapes[i, 0], org_shapes[i, 1])
            res_map = cv2.resize(
                res_map, (org_shape[1], org_shape[0]),
                interpolation=cv2.INTER_NEAREST)

            png_fn = to_png_fn(img_name)
            # colorful segment result visualization
            vis_fn = os.path.join(save_dir, png_fn)
            dirname = os.path.dirname(vis_fn)
            makedirs(dirname)

            pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L')
            #pred_mask.putpalette(color_map)
            pred_mask.save(vis_fn)

            img_cnt += 1
            print("#{} visualize image path: {}".format(img_cnt, vis_fn))

            # Use VisualDL to visualize image
            if log_writer is not None:
                # Calulate epoch from ckpt_dir folder name
                epoch = int(os.path.split(ckpt_dir)[-1])
                print("VisualDL visualization epoch", epoch)

                pred_mask_np = np.array(pred_mask.convert("RGB"))
                log_writer.add_image("Predict/{}".format(img_name),
                                     pred_mask_np, epoch)
                # Original image
                # BGR->RGB
                img = cv2.imread(os.path.join(cfg1.DATASET.DATA_DIR,
                                              img_name))[..., ::-1]
                log_writer.add_image("Images/{}".format(img_name), img, epoch)
                # add ground truth (label) images
                grt = grts[i]
                if grt is not None:
                    grt = grt[0:valid_shape[0], 0:valid_shape[1]]
                    grt_pil = PILImage.fromarray(grt.astype(np.uint8), mode='P')
                    grt_pil.putpalette(color_map)
                    grt_pil = grt_pil.resize((org_shape[1], org_shape[0]))
                    grt = np.array(grt_pil.convert("256"))
                    log_writer.add_image("Label/{}".format(img_name), grt,
                                         epoch)

        # If in local_test mode, only visualize 5 images just for testing
        # procedure
        if local_test and img_cnt >= 5:
            break
Esempio n. 15
0
def train(cfg):
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    test_prog = fluid.Program()
    if args.enable_ce:
        startup_prog.random_seed = 1000
        train_prog.random_seed = 1000
    drop_last = True

    dataset = SegDataset(
        file_list=cfg.DATASET.TRAIN_FILE_LIST,
        mode=ModelPhase.TRAIN,
        shuffle=True,
        data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item[0], item[1], item[2]
                batch_data = []
        # If use sync batch norm strategy, drop last batch if number of samples
        # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues
        if not cfg.TRAIN.SYNC_BATCH_NORM:
            for item in batch_data:
                yield item[0], item[1], item[2]

    # Get device environment
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#Device count: {}".format(dev_count))

    # Make sure BATCH_SIZE can divided by GPU cards
    assert cfg.BATCH_SIZE % dev_count == 0, (
        'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format(
            cfg.BATCH_SIZE, dev_count))
    # If use multi-gpu training mode, batch data will allocated to each GPU evenly
    batch_size_per_dev = cfg.BATCH_SIZE // dev_count
    print_info("batch_size_per_dev: {}".format(batch_size_per_dev))

    data_loader, avg_loss, lr, pred, grts, masks = build_model(
        train_prog, startup_prog, phase=ModelPhase.TRAIN)
    build_model(test_prog, fluid.Program(), phase=ModelPhase.EVAL)
    data_loader.set_sample_generator(
        data_generator, batch_size=batch_size_per_dev, drop_last=drop_last)

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    exec_strategy = fluid.ExecutionStrategy()
    # Clear temporary variables every 100 iteration
    if args.use_gpu:
        exec_strategy.num_threads = fluid.core.get_cuda_device_count()
    exec_strategy.num_iteration_per_drop_scope = 100
    build_strategy = fluid.BuildStrategy()

    if cfg.NUM_TRAINERS > 1 and args.use_gpu:
        dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog)
        exec_strategy.num_threads = 1

    if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
        if dev_count > 1:
            # Apply sync batch norm strategy
            print_info("Sync BatchNorm strategy is effective.")
            build_strategy.sync_batch_norm = True
        else:
            print_info(
                "Sync BatchNorm strategy will not be effective if GPU device"
                " count <= 1")
    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=avg_loss.name,
        exec_strategy=exec_strategy,
        build_strategy=build_strategy)

    # Resume training
    begin_epoch = cfg.SOLVER.BEGIN_EPOCH
    if cfg.TRAIN.RESUME_MODEL_DIR:
        begin_epoch = load_checkpoint(exe, train_prog)
    # Load pretrained model
    elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
        load_pretrained_weights(exe, train_prog, cfg.TRAIN.PRETRAINED_MODEL_DIR)
    else:
        print_info(
            'Pretrained model dir {} not exists, training from scratch...'.
            format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

    fetch_list = [avg_loss.name, lr.name]
    if args.debug:
        # Fetch more variable info and use streaming confusion matrix to
        # calculate IoU results if in debug mode
        np.set_printoptions(
            precision=4, suppress=True, linewidth=160, floatmode="fixed")
        fetch_list.extend([pred.name, grts.name, masks.name])
        cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)

    if args.use_vdl:
        if not args.vdl_log_dir:
            print_info("Please specify the log directory by --vdl_log_dir.")
            exit(1)

        from visualdl import LogWriter
        log_writer = LogWriter(args.vdl_log_dir)

    # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
    # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    step = 0
    all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE
    if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True:
        all_step += 1
    all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

    avg_loss = 0.0
    best_mIoU = 0.0

    timer = Timer()
    timer.start()
    if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
        raise ValueError(
            ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
                begin_epoch, cfg.SOLVER.NUM_EPOCHS))

    if args.use_mpio:
        print_info("Use multiprocess reader")
    else:
        print_info("Use multi-thread reader")

    for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
        data_loader.start()
        while True:
            try:
                if args.debug:
                    # Print category IoU and accuracy to check whether the
                    # traning process is corresponed to expectation
                    loss, lr, pred, grts, masks = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    cm.calculate(pred, grts, masks)
                    avg_loss += np.mean(np.array(loss))
                    step += 1

                    if step % args.log_steps == 0:
                        speed = args.log_steps / timer.elapsed_time()
                        avg_loss /= args.log_steps
                        category_acc, mean_acc = cm.accuracy()
                        category_iou, mean_iou = cm.mean_iou()

                        print_info((
                            "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, step, lr[0], avg_loss, mean_acc,
                                 mean_iou, speed,
                                 calculate_eta(all_step - step, speed)))
                        print_info("Category IoU: ", category_iou)
                        print_info("Category Acc: ", category_acc)
                        if args.use_vdl:
                            log_writer.add_scalar('Train/mean_iou', mean_iou,
                                                  step)
                            log_writer.add_scalar('Train/mean_acc', mean_acc,
                                                  step)
                            log_writer.add_scalar('Train/loss', avg_loss, step)
                            log_writer.add_scalar('Train/lr', lr[0], step)
                            log_writer.add_scalar('Train/step/sec', speed, step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        cm.zero_matrix()
                        timer.restart()
                else:
                    # If not in debug mode, avoid unnessary log and calculate
                    loss, lr = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    avg_loss += np.mean(np.array(loss))
                    step += 1

                    if step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                        avg_loss /= args.log_steps
                        speed = args.log_steps / timer.elapsed_time()
                        print((
                            "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, step, lr[0], avg_loss, speed,
                                 calculate_eta(all_step - step, speed)))
                        if args.use_vdl:
                            log_writer.add_scalar('Train/loss', avg_loss, step)
                            log_writer.add_scalar('Train/lr', lr[0], step)
                            log_writer.add_scalar('Train/speed', speed, step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        timer.restart()

                    # NOTE : used for benchmark, profiler tools
                    if args.is_profiler and epoch == 1 and step == args.log_steps:
                        profiler.start_profiler("All")
                    elif args.is_profiler and epoch == 1 and step == args.log_steps + 5:
                        profiler.stop_profiler("total", args.profiler_path)
                        return

            except fluid.core.EOFException:
                data_loader.reset()
                break
            except Exception as e:
                print(e)

        if (epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0
                or epoch == cfg.SOLVER.NUM_EPOCHS) and cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(train_prog, epoch)
            save_infer_program(test_prog, ckpt_dir)

            if args.do_eval:
                print("Evaluation start")
                _, mean_iou, _, mean_acc = evaluate(
                    cfg=cfg,
                    ckpt_dir=ckpt_dir,
                    use_gpu=args.use_gpu,
                    use_mpio=args.use_mpio)
                if args.use_vdl:
                    log_writer.add_scalar('Evaluate/mean_iou', mean_iou, step)
                    log_writer.add_scalar('Evaluate/mean_acc', mean_acc, step)

                if mean_iou > best_mIoU:
                    best_mIoU = mean_iou
                    update_best_model(ckpt_dir)
                    print_info("Save best model {} to {}, mIoU = {:.4f}".format(
                        ckpt_dir,
                        os.path.join(cfg.TRAIN.MODEL_SAVE_DIR, 'best_model'),
                        mean_iou))

            # Use VisualDL to visualize results
            if args.use_vdl and cfg.DATASET.VIS_FILE_LIST is not None:
                visualize(
                    cfg=cfg,
                    use_gpu=args.use_gpu,
                    vis_file_list=cfg.DATASET.VIS_FILE_LIST,
                    vis_dir="visual",
                    ckpt_dir=ckpt_dir,
                    log_writer=log_writer)

    # save final model
    if cfg.TRAINER_ID == 0:
        ckpt_dir = save_checkpoint(train_prog, 'final')
        save_infer_program(test_prog, ckpt_dir)
Esempio n. 16
0
def main(opt, device_id):
    opt = training_opt_postprocessing(opt, device_id)
    init_logger(opt.log_file)
    # Load checkpoint if we resume from a previous training.
    if opt.train_from:
        logger.info('Loading checkpoint from %s' % opt.train_from)
        checkpoint = torch.load(opt.train_from,
                                map_location=lambda storage, loc: storage)

        # Load default opts values then overwrite it with opts from
        # the checkpoint. It's usefull in order to re-train a model
        # after adding a new option (not set in checkpoint)
        dummy_parser = configargparse.ArgumentParser()
        opts.model_opts(dummy_parser)
        default_opt = dummy_parser.parse_known_args([])[0]

        model_opt = default_opt
        model_opt.__dict__.update(checkpoint['opt'].__dict__)
    else:
        checkpoint = None
        model_opt = opt

    # Peek the first dataset to determine the data_type.
    # (All datasets have the same data_type).
    first_dataset = next(lazily_load_dataset("train", opt))
    data_type = first_dataset.data_type
    model_opt.input_size = first_dataset.examples[0].src.size()[0]

    # Load fields generated from preprocess phase.
    fields = load_fields(first_dataset, opt, checkpoint)

    # Report src/tgt features.

    src_features, tgt_features = _collect_report_features(fields)
    for j, feat in enumerate(src_features):
        logger.info(' * src feature %d size = %d'
                    % (j, len(fields[feat].vocab)))
    for j, feat in enumerate(tgt_features):
        logger.info(' * tgt feature %d size = %d'
                    % (j, len(fields[feat].vocab)))

    # Build model.
    model = build_model(model_opt, opt, fields, checkpoint)
    n_params, enc, dec = _tally_parameters(model)
    logger.info('encoder: %d' % enc)
    logger.info('decoder: %d' % dec)
    logger.info('* number of parameters: %d' % n_params)
    _check_save_model_path(opt)

    # Build optimizer.
    optim = build_optim(model, opt, checkpoint)

    # Build model saver
    model_saver = build_model_saver(model_opt, opt, model, fields, optim)

    trainer = build_trainer(opt, device_id, model, fields,
                            optim, data_type, model_saver=model_saver)

    def train_iter_fct(): return build_dataset_iter(
        lazily_load_dataset("train", opt), fields, opt)

    def valid_iter_fct(): return build_dataset_iter(
        lazily_load_dataset("valid", opt), fields, opt, is_train=False)

    # Do training.
    if len(opt.gpu_ranks):
        logger.info('Starting training on GPU: %s' % opt.gpu_ranks)
    else:
        logger.info('Starting training on CPU, could be very slow')
    trainer.train(train_iter_fct, valid_iter_fct, opt.train_steps,
                  opt.valid_steps)

    if opt.tensorboard:
        trainer.report_manager.tensorboard_writer.close()
Esempio n. 17
0
def visualize(cfg,
              vis_file_list=None,
              use_gpu=False,
              vis_dir="visual",
              also_save_raw_results=False,
              ckpt_dir=None,
              log_writer=None,
              local_test=False,
              **kwargs):
    if vis_file_list is None:
        vis_file_list = cfg.DATASET.TEST_FILE_LIST

    dataset = LaneNetDataset(file_list=vis_file_list,
                             mode=ModelPhase.VISUAL,
                             shuffle=True,
                             data_dir=cfg.DATASET.DATA_DIR)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL)
    # Clone forward graph
    test_prog = test_prog.clone(for_test=True)

    # Get device environment
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    save_dir = os.path.join(vis_dir, 'visual_results')
    makedirs(save_dir)
    if also_save_raw_results:
        raw_save_dir = os.path.join(vis_dir, 'raw_results')
        makedirs(raw_save_dir)

    fetch_list = [pred.name, logit.name]
    test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True)

    postprocessor = lanenet_postprocess.LaneNetPostProcessor()
    for imgs, grts, grts_instance, img_names, valid_shapes, org_imgs in test_reader:
        segLogits, emLogits = exe.run(program=test_prog,
                                      feed={'image': imgs},
                                      fetch_list=fetch_list,
                                      return_numpy=True)
        num_imgs = segLogits.shape[0]

        for i in range(num_imgs):
            gt_image = org_imgs[i]
            binary_seg_image, instance_seg_image = segLogits[i].squeeze(
                -1), emLogits[i].transpose((1, 2, 0))

            postprocess_result = postprocessor.postprocess(
                binary_seg_result=binary_seg_image,
                instance_seg_result=instance_seg_image,
                source_image=gt_image)
            pred_binary_fn = os.path.join(
                save_dir, to_png_fn(img_names[i], name='_pred_binary'))
            pred_lane_fn = os.path.join(
                save_dir, to_png_fn(img_names[i], name='_pred_lane'))
            pred_instance_fn = os.path.join(
                save_dir, to_png_fn(img_names[i], name='_pred_instance'))
            dirname = os.path.dirname(pred_binary_fn)

            makedirs(dirname)
            mask_image = postprocess_result['mask_image']
            for i in range(4):
                instance_seg_image[:, :,
                                   i] = minmax_scale(instance_seg_image[:, :,
                                                                        i])
            embedding_image = np.array(instance_seg_image).astype(np.uint8)

            plt.figure('mask_image')
            plt.imshow(mask_image[:, :, (2, 1, 0)])
            plt.figure('src_image')
            plt.imshow(gt_image[:, :, (2, 1, 0)])
            plt.figure('instance_image')
            plt.imshow(embedding_image[:, :, (2, 1, 0)])
            plt.figure('binary_image')
            plt.imshow(binary_seg_image * 255, cmap='gray')
            plt.show()

            cv2.imwrite(pred_binary_fn,
                        np.array(binary_seg_image * 255).astype(np.uint8))
            cv2.imwrite(pred_lane_fn, postprocess_result['source_image'])
            cv2.imwrite(pred_instance_fn, mask_image)
            print(pred_lane_fn, 'saved!')
Esempio n. 18
0
def visualize(cfg,
              vis_file_list=None,
              use_gpu=False,
              vis_dir="show",
              ckpt_dir=None,
              log_writer=None,
              local_test=False,
              **kwargs):
    if vis_file_list is None:
        vis_file_list = cfg.DATASET.VIS_FILE_LIST
    dataset = SegDataset(file_list=vis_file_list,
                         mode=ModelPhase.VISUAL,
                         data_dir=cfg.DATASET.DATA_DIR)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    pred, logit, out = build_model(test_prog,
                                   startup_prog,
                                   phase=ModelPhase.VISUAL)
    # Clone forward graph
    test_prog = test_prog.clone(for_test=True)

    # Generator full colormap for maximum 256 classes
    color_map = get_color_map_list(256)

    # Get device environment
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        try:
            fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
        except:
            fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    save_dir = "show"
    makedirs(save_dir)

    fetch_list = [pred.name, logit.name]
    test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True)
    img_cnt = 0
    for imgs, grts, img_names, valid_shapes, org_shapes in test_reader:
        pred_shape = (imgs.shape[2], imgs.shape[3])
        pred, logit = exe.run(program=test_prog,
                              feed={'image': imgs},
                              fetch_list=fetch_list,
                              return_numpy=True)

        num_imgs = pred.shape[0]
        # TODO: use multi-thread to write images
        for i in range(num_imgs):
            # Add more comments
            res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8)
            img_name = img_names[i]
            res_shape = (res_map.shape[0], res_map.shape[1])
            if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]:
                res_map = cv2.resize(res_map,
                                     pred_shape,
                                     interpolation=cv2.INTER_NEAREST)
            valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1])
            res_map = res_map[0:valid_shape[0], 0:valid_shape[1]]
            org_shape = (org_shapes[i, 0], org_shapes[i, 1])
            res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]),
                                 interpolation=cv2.INTER_NEAREST)

            png_fn = to_png_fn(img_name)
            # colorful segment result visualization
            vis_fn = os.path.join(save_dir, png_fn)
            dirname = os.path.dirname(vis_fn)
            makedirs(dirname)

            pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L')
            pred_mask.putpalette(color_map)
            # pred_mask.save(vis_fn)

            pred_mask_np = np.array(pred_mask.convert("RGB"))
            im_pred = PILImage.fromarray(pred_mask_np)

            # Original image
            # BGR->RGB
            img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR,
                                          img_name))[..., ::-1]
            im_ori = PILImage.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            # log_writer.add_image("Images/{}".format(img_name), img, epoch)
            # add ground truth (label) images

            im_pred_cat = PILImage.blend(im_ori, im_pred, 0.5)
            im_ori = join(im_ori, im_ori, flag="vertical")
            im_pred_cat = join(im_pred_cat, im_pred, flag="vertical")
            new_img = join(im_ori, im_pred_cat)
            new_img.save(vis_fn)

            img_cnt += 1
            print("#{} show image path: {}".format(img_cnt, vis_fn))
Esempio n. 19
0
def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
    np.set_printoptions(precision=5, suppress=True)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    dataset = SegDataset(file_list=cfg.DATASET.VAL_FILE_LIST,
                         mode=ModelPhase.EVAL,
                         data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        #TODO: check is batch reader compatitable with Windows
        if use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        for b in data_gen:
            yield b[0], b[1], b[2]

    data_loader, avg_loss, pred, grts, masks = build_model(
        test_prog, startup_prog, phase=ModelPhase.EVAL)

    data_loader.set_sample_generator(data_generator,
                                     drop_last=False,
                                     batch_size=cfg.BATCH_SIZE)

    # Get device environment
    places = fluid.cuda_places() if use_gpu else fluid.cpu_places()
    place = places[0]
    dev_count = len(places)
    print("#Device count: {}".format(dev_count))

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    test_prog = test_prog.clone(for_test=True)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if not os.path.exists(ckpt_dir):
        raise ValueError(
            'The TEST.TEST_MODEL {} is not found'.format(ckpt_dir))

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        load_model(exe, test_prog, ckpt_dir)

    # Use streaming confusion matrix to calculate mean_iou
    np.set_printoptions(precision=4,
                        suppress=True,
                        linewidth=160,
                        floatmode="fixed")
    conf_mat = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)
    fetch_list = [avg_loss.name, pred.name, grts.name, masks.name]
    num_images = 0
    step = 0
    all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
    timer = Timer()
    timer.start()
    data_loader.start()
    while True:
        try:
            step += 1
            loss, pred, grts, masks = exe.run(test_prog,
                                              fetch_list=fetch_list,
                                              return_numpy=True)

            loss = np.mean(np.array(loss))

            num_images += pred.shape[0]
            conf_mat.calculate(pred, grts, masks)
            _, iou = conf_mat.mean_iou()
            _, acc = conf_mat.accuracy()

            speed = 1.0 / timer.elapsed_time()

            print(
                "[EVAL]step={} loss={:.5f} acc={:.4f} IoU={:.4f} step/sec={:.2f} | ETA {}"
                .format(step, loss, acc, iou, speed,
                        calculate_eta(all_step - step, speed)))
            timer.restart()
            sys.stdout.flush()
        except fluid.core.EOFException:
            break

    category_iou, avg_iou = conf_mat.mean_iou()
    category_acc, avg_acc = conf_mat.accuracy()
    print("[EVAL]#image={} acc={:.4f} IoU={:.4f}".format(
        num_images, avg_acc, avg_iou))
    print("[EVAL]Category IoU:", category_iou)
    print("[EVAL]Category Acc:", category_acc)
    print("[EVAL]Kappa:{:.4f}".format(conf_mat.kappa()))

    return category_iou, avg_iou, category_acc, avg_acc
Esempio n. 20
0
def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
    np.set_printoptions(precision=5, suppress=True)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()

    dataset = LaneNetDataset(file_list=cfg.DATASET.VAL_FILE_LIST,
                             mode=ModelPhase.TRAIN,
                             shuffle=True,
                             data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        #TODO: check is batch reader compatitable with Windows
        if use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        for b in data_gen:
            yield b

    data_loader, pred, grts, masks, accuracy, fp, fn = build_model(
        test_prog, startup_prog, phase=ModelPhase.EVAL)

    data_loader.set_sample_generator(data_generator,
                                     drop_last=False,
                                     batch_size=cfg.BATCH_SIZE)

    # Get device environment
    places = fluid.cuda_places() if use_gpu else fluid.cpu_places()
    place = places[0]
    dev_count = len(places)
    print("#Device count: {}".format(dev_count))

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    test_prog = test_prog.clone(for_test=True)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    # Use streaming confusion matrix to calculate mean_iou
    np.set_printoptions(precision=4,
                        suppress=True,
                        linewidth=160,
                        floatmode="fixed")
    fetch_list = [
        pred.name, grts.name, masks.name, accuracy.name, fp.name, fn.name
    ]
    num_images = 0
    step = 0
    avg_acc = 0.0
    avg_fp = 0.0
    avg_fn = 0.0
    # cur_images = 0
    all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
    timer = Timer()
    timer.start()
    data_loader.start()
    while True:
        try:
            step += 1
            pred, grts, masks, out_acc, out_fp, out_fn = exe.run(
                test_prog, fetch_list=fetch_list, return_numpy=True)

            avg_acc += np.mean(out_acc) * pred.shape[0]
            avg_fp += np.mean(out_fp) * pred.shape[0]
            avg_fn += np.mean(out_fn) * pred.shape[0]
            num_images += pred.shape[0]

            speed = 1.0 / timer.elapsed_time()

            print(
                "[EVAL]step={} accuracy={:.4f} fp={:.4f} fn={:.4f} step/sec={:.2f} | ETA {}"
                .format(step, avg_acc / num_images, avg_fp / num_images,
                        avg_fn / num_images, speed,
                        calculate_eta(all_step - step, speed)))

            timer.restart()
            sys.stdout.flush()
        except fluid.core.EOFException:
            break

    print("[EVAL]#image={} accuracy={:.4f} fp={:.4f} fn={:.4f}".format(
        num_images, avg_acc / num_images, avg_fp / num_images,
        avg_fn / num_images))

    return avg_acc / num_images, avg_fp / num_images, avg_fn / num_images
Esempio n. 21
0
def test_model():
    # from configs.configs import cfg
    model = model_builder.build_model(cfg=cfg, logger=None)
    inp = torch.randn((2, 6, 512, 512))
    out = model(inp)
    print(model)
Esempio n. 22
0
def train(cfg):
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    drop_last = True

    dataset = SegDataset(file_list=cfg.DATASET.TRAIN_FILE_LIST,
                         mode=ModelPhase.TRAIN,
                         shuffle=True,
                         data_dir=cfg.DATASET.DATA_DIR)

    def data_generator():
        if args.use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        batch_data = []
        for b in data_gen:
            batch_data.append(b)
            if len(batch_data) == (cfg.BATCH_SIZE // cfg.NUM_TRAINERS):
                for item in batch_data:
                    yield item[0], item[1], item[2]
                batch_data = []
        # If use sync batch norm strategy, drop last batch if number of samples
        # in batch_data is less then cfg.BATCH_SIZE to avoid NCCL hang issues
        if not cfg.TRAIN.SYNC_BATCH_NORM:
            for item in batch_data:
                yield item[0], item[1], item[2]

    # Get device environment
    # places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
    # place = places[0]
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if args.use_gpu else fluid.CPUPlace()
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()

    # Get number of GPU
    dev_count = cfg.NUM_TRAINERS if cfg.NUM_TRAINERS > 1 else len(places)
    print_info("#Device count: {}".format(dev_count))

    # Make sure BATCH_SIZE can divided by GPU cards
    assert cfg.BATCH_SIZE % dev_count == 0, (
        'BATCH_SIZE:{} not divisble by number of GPUs:{}'.format(
            cfg.BATCH_SIZE, dev_count))
    # If use multi-gpu training mode, batch data will allocated to each GPU evenly
    batch_size_per_dev = cfg.BATCH_SIZE // dev_count
    print_info("batch_size_per_dev: {}".format(batch_size_per_dev))

    py_reader, avg_loss, lr, pred, grts, masks = build_model(
        train_prog, startup_prog, phase=ModelPhase.TRAIN)
    py_reader.decorate_sample_generator(data_generator,
                                        batch_size=batch_size_per_dev,
                                        drop_last=drop_last)

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    exec_strategy = fluid.ExecutionStrategy()
    # Clear temporary variables every 100 iteration
    if args.use_gpu:
        exec_strategy.num_threads = fluid.core.get_cuda_device_count()
    exec_strategy.num_iteration_per_drop_scope = 100
    build_strategy = fluid.BuildStrategy()

    if cfg.NUM_TRAINERS > 1 and args.use_gpu:
        dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog)
        exec_strategy.num_threads = 1

    if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
        if dev_count > 1:
            # Apply sync batch norm strategy
            print_info("Sync BatchNorm strategy is effective.")
            build_strategy.sync_batch_norm = True
        else:
            print_info(
                "Sync BatchNorm strategy will not be effective if GPU device"
                " count <= 1")
    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=avg_loss.name,
        exec_strategy=exec_strategy,
        build_strategy=build_strategy)

    # Resume training
    begin_epoch = cfg.SOLVER.BEGIN_EPOCH
    if cfg.TRAIN.RESUME_MODEL_DIR:
        begin_epoch = load_checkpoint(exe, train_prog)
    # Load pretrained model
    elif os.path.exists(cfg.TRAIN.PRETRAINED_MODEL_DIR):
        print_info('Pretrained model dir: ', cfg.TRAIN.PRETRAINED_MODEL_DIR)
        load_vars = []
        load_fail_vars = []

        def var_shape_matched(var, shape):
            """
            Check whehter persitable variable shape is match with current network
            """
            var_exist = os.path.exists(
                os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
            if var_exist:
                var_shape = parse_shape_from_file(
                    os.path.join(cfg.TRAIN.PRETRAINED_MODEL_DIR, var.name))
                return var_shape == shape
            return False

        for x in train_prog.list_vars():
            if isinstance(x, fluid.framework.Parameter):
                shape = tuple(fluid.global_scope().find_var(
                    x.name).get_tensor().shape())
                if var_shape_matched(x, shape):
                    load_vars.append(x)
                else:
                    load_fail_vars.append(x)

        fluid.io.load_vars(exe,
                           dirname=cfg.TRAIN.PRETRAINED_MODEL_DIR,
                           vars=load_vars)
        for var in load_vars:
            print_info("Parameter[{}] loaded sucessfully!".format(var.name))
        for var in load_fail_vars:
            print_info(
                "Parameter[{}] don't exist or shape does not match current network, skip"
                " to load it.".format(var.name))
        print_info("{}/{} pretrained parameters loaded successfully!".format(
            len(load_vars),
            len(load_vars) + len(load_fail_vars)))
    else:
        print_info(
            'Pretrained model dir {} not exists, training from scratch...'.
            format(cfg.TRAIN.PRETRAINED_MODEL_DIR))

    fetch_list = [avg_loss.name, lr.name]
    if args.debug:
        # Fetch more variable info and use streaming confusion matrix to
        # calculate IoU results if in debug mode
        np.set_printoptions(precision=4,
                            suppress=True,
                            linewidth=160,
                            floatmode="fixed")
        fetch_list.extend([pred.name, grts.name, masks.name])
        cm = ConfusionMatrix(cfg.DATASET.NUM_CLASSES, streaming=True)

    if args.use_tb:
        if not args.tb_log_dir:
            print_info("Please specify the log directory by --tb_log_dir.")
            exit(1)

        from tb_paddle import SummaryWriter
        log_writer = SummaryWriter(args.tb_log_dir)

    # trainer_id = int(os.getenv("PADDLE_TRAINER_ID", 0))
    # num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    global_step = 0
    all_step = cfg.DATASET.TRAIN_TOTAL_IMAGES // cfg.BATCH_SIZE
    if cfg.DATASET.TRAIN_TOTAL_IMAGES % cfg.BATCH_SIZE and drop_last != True:
        all_step += 1
    all_step *= (cfg.SOLVER.NUM_EPOCHS - begin_epoch + 1)

    avg_loss = 0.0
    timer = Timer()
    timer.start()
    if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
        raise ValueError((
            "begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
                begin_epoch, cfg.SOLVER.NUM_EPOCHS))

    if args.use_mpio:
        print_info("Use multiprocess reader")
    else:
        print_info("Use multi-thread reader")

    for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
        py_reader.start()
        while True:
            try:
                if args.debug:
                    # Print category IoU and accuracy to check whether the
                    # traning process is corresponed to expectation
                    loss, lr, pred, grts, masks = exe.run(
                        program=compiled_train_prog,
                        fetch_list=fetch_list,
                        return_numpy=True)
                    cm.calculate(pred, grts, masks)
                    avg_loss += np.mean(np.array(loss))
                    global_step += 1

                    if global_step % args.log_steps == 0:
                        speed = args.log_steps / timer.elapsed_time()
                        avg_loss /= args.log_steps
                        category_acc, mean_acc = cm.accuracy()
                        category_iou, mean_iou = cm.mean_iou()

                        print_info((
                            "epoch={} step={} lr={:.5f} loss={:.4f} acc={:.5f} mIoU={:.5f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, global_step, lr[0], avg_loss, mean_acc,
                                 mean_iou, speed,
                                 calculate_eta(all_step - global_step, speed)))
                        print_info("Category IoU: ", category_iou)
                        print_info("Category Acc: ", category_acc)
                        if args.use_tb:
                            log_writer.add_scalar('Train/mean_iou', mean_iou,
                                                  global_step)
                            log_writer.add_scalar('Train/mean_acc', mean_acc,
                                                  global_step)
                            log_writer.add_scalar('Train/loss', avg_loss,
                                                  global_step)
                            log_writer.add_scalar('Train/lr', lr[0],
                                                  global_step)
                            log_writer.add_scalar('Train/step/sec', speed,
                                                  global_step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        cm.zero_matrix()
                        timer.restart()
                else:
                    # If not in debug mode, avoid unnessary log and calculate
                    loss, lr = exe.run(program=compiled_train_prog,
                                       fetch_list=fetch_list,
                                       return_numpy=True)
                    avg_loss += np.mean(np.array(loss))
                    global_step += 1

                    if global_step % args.log_steps == 0 and cfg.TRAINER_ID == 0:
                        avg_loss /= args.log_steps
                        speed = args.log_steps / timer.elapsed_time()
                        print((
                            "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}"
                        ).format(epoch, global_step, lr[0], avg_loss, speed,
                                 calculate_eta(all_step - global_step, speed)))
                        if args.use_tb:
                            log_writer.add_scalar('Train/loss', avg_loss,
                                                  global_step)
                            log_writer.add_scalar('Train/lr', lr[0],
                                                  global_step)
                            log_writer.add_scalar('Train/speed', speed,
                                                  global_step)
                        sys.stdout.flush()
                        avg_loss = 0.0
                        timer.restart()

            except fluid.core.EOFException:
                py_reader.reset()
                break
            except Exception as e:
                print(e)

        if epoch % cfg.TRAIN.SNAPSHOT_EPOCH == 0 and cfg.TRAINER_ID == 0:
            ckpt_dir = save_checkpoint(exe, train_prog, epoch)

            if args.do_eval:
                print("Evaluation start")
                _, mean_iou, _, mean_acc = evaluate(cfg=cfg,
                                                    ckpt_dir=ckpt_dir,
                                                    use_gpu=args.use_gpu,
                                                    use_mpio=args.use_mpio)
                if args.use_tb:
                    log_writer.add_scalar('Evaluate/mean_iou', mean_iou,
                                          global_step)
                    log_writer.add_scalar('Evaluate/mean_acc', mean_acc,
                                          global_step)

            # Use Tensorboard to visualize results
            if args.use_tb and cfg.DATASET.VIS_FILE_LIST is not None:
                visualize(cfg=cfg,
                          use_gpu=args.use_gpu,
                          vis_file_list=cfg.DATASET.VIS_FILE_LIST,
                          vis_dir="visual",
                          ckpt_dir=ckpt_dir,
                          log_writer=log_writer)

    # save final model
    if cfg.TRAINER_ID == 0:
        save_checkpoint(exe, train_prog, 'final')
def evaluate(cfg, ckpt_dir=None, use_gpu=False, vis=False, vis_dir='vis_out/test_public', use_mpio=False, **kwargs):
    np.set_printoptions(precision=5, suppress=True)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    dataset = SegDataset(
        file_list=cfg.DATASET.VAL_FILE_LIST,
        mode=ModelPhase.EVAL,
        data_dir=cfg.DATASET.DATA_DIR)

    fls = []
    with open(cfg.DATASET.VAL_FILE_LIST) as fr:
        for line in fr.readlines():
            fls.append(line.strip().split(' ')[0])
    if vis:
        assert cfg.VIS.VISINEVAL is True
        if not os.path.exists(vis_dir):
            os.makedirs(vis_dir)

    def data_generator():
        #TODO: check is batch reader compatitable with Windows
        if use_mpio:
            data_gen = dataset.multiprocess_generator(
                num_processes=cfg.DATALOADER.NUM_WORKERS,
                max_queue_size=cfg.DATALOADER.BUF_SIZE)
        else:
            data_gen = dataset.generator()

        for b in data_gen:
            if cfg.DATASET.INPUT_IMAGE_NUM == 1:
                yield b[0], b[1], b[2]
            else:
                yield b[0], b[1], b[2], b[3]

    data_loader, avg_loss, pred, grts, masks = build_model(
        test_prog, startup_prog, phase=ModelPhase.EVAL)

    data_loader.set_sample_generator(
        data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE)

    # Get device environment
    places = fluid.cuda_places() if use_gpu else fluid.cpu_places()
    place = places[0]
    dev_count = len(places)
    print("#Device count: {}".format(dev_count))

    exe = fluid.Executor(place)
    exe.run(startup_prog)
    test_prog = test_prog.clone(for_test=True)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if not os.path.exists(ckpt_dir):
        raise ValueError('The TEST.TEST_MODEL {} is not found'.format(ckpt_dir))

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        try:
            fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
        except:
            fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    # Use streaming confusion matrix to calculate mean_iou
    np.set_printoptions(
        precision=4, suppress=True, linewidth=160, floatmode="fixed")
    class_num = cfg.DATASET.NUM_CLASSES
    conf_mat = ConfusionMatrix(class_num, streaming=True)
    fetch_list = [avg_loss.name, pred.name, grts.name, masks.name]
    num_images = 0
    step = 0
    all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
    timer = Timer()
    timer.start()
    data_loader.start()
    cnt = 0
    while True:
        try:
            step += 1
            loss, pred, grts, masks = exe.run(
                test_prog, fetch_list=fetch_list, return_numpy=True)
            if vis:
                preds = np.array(pred, dtype=np.float32)
                for j in range(preds.shape[0]):
                    if cnt > len(fls): continue
                    name = fls[cnt].split('/')[-1].split('.')[0]
                    p = np.squeeze(preds[j])
                    np.save(os.path.join(vis_dir, name + '.npy'), p)
                    cnt += 1
                print('vis %d npy... (%d tif sample)' % (cnt, cnt//36))
                continue

            loss = np.mean(np.array(loss))

            num_images += pred.shape[0]
            conf_mat.calculate(pred, grts, masks)
            _, iou = conf_mat.mean_iou()
            _, acc = conf_mat.accuracy()
            fwiou = conf_mat.frequency_weighted_iou()

            speed = 1.0 / timer.elapsed_time()

            print(
                "[EVAL]step={} loss={:.5f} acc={:.4f} IoU={:.4f} FWIoU={:.4f} step/sec={:.2f} | ETA {}"
                .format(step, loss, acc, iou, fwiou, speed, calculate_eta(all_step - step, speed)))
            timer.restart()
            sys.stdout.flush()
        except fluid.core.EOFException:
            break

    if vis:
        return

    category_iou, avg_iou = conf_mat.mean_iou()
    category_acc, avg_acc = conf_mat.accuracy()
    fwiou = conf_mat.frequency_weighted_iou()
    print("[EVAL]#image={} acc={:.4f} IoU={:.4f} FWIoU={:.4f}".format(
        num_images, avg_acc, avg_iou, fwiou))
    print("[EVAL]Category Acc:", category_acc)
    print("[EVAL]Category IoU:", category_iou)
    print("[EVAL]Kappa: {:.4f}".format(conf_mat.kappa()))

    return category_iou, avg_iou, category_acc, avg_acc
Esempio n. 24
0
def main():
    # Set logger to record information.
    logger = Logger(cfg)
    logger.log_info(cfg)
    metrics_logger = Metrics()
    utils.pack_code(cfg, logger=logger)

    # Build model.
    model = model_builder.build_model(cfg=cfg, logger=logger)

    # Read checkpoint.
    ckpt = torch.load(cfg.MODEL.PATH2CKPT) if cfg.GENERAL.RESUME else {}

    if cfg.GENERAL.RESUME:
        model.load_state_dict(ckpt["model"])
    resume_epoch = ckpt["epoch"] if cfg.GENERAL.RESUME else 0
    optimizer = ckpt[
        "optimizer"] if cfg.GENERAL.RESUME else optimizer_helper.build_optimizer(
            cfg=cfg, model=model)
    # lr_scheduler = ckpt["lr_scheduler"] if cfg.GENERAL.RESUME else lr_scheduler_helper.build_scheduler(cfg=cfg, optimizer=optimizer)
    lr_scheduler = lr_scheduler_helper.build_scheduler(cfg=cfg,
                                                       optimizer=optimizer)
    lr_scheduler.sychronize(resume_epoch)
    loss_fn = ckpt[
        "loss_fn"] if cfg.GENERAL.RESUME else loss_fn_helper.build_loss_fn(
            cfg=cfg)

    # Set device.
    model, device = utils.set_device(model, cfg.GENERAL.GPU)

    # Prepare dataset.
    if cfg.GENERAL.TRAIN:
        try:
            train_data_loader = data_loader.build_data_loader(
                cfg, cfg.DATA.DATASET, "train")
        except:
            logger.log_info("Cannot build train dataset.")
    if cfg.GENERAL.VALID:
        try:
            valid_data_loader = data_loader.build_data_loader(
                cfg, cfg.DATA.DATASET, "valid")
        except:
            logger.log_info("Cannot build valid dataset.")
    if cfg.GENERAL.TEST:
        try:
            test_data_loader = data_loader.build_data_loader(
                cfg, cfg.DATA.DATASET, "test")
        except:
            logger.log_info("Cannot build test dataset.")

    # Train, evaluate model and save checkpoint.
    for epoch in range(cfg.TRAIN.MAX_EPOCH):
        if resume_epoch >= epoch:
            continue

        try:
            train_one_epoch(
                epoch=epoch,
                cfg=cfg,
                model=model,
                data_loader=train_data_loader,
                device=device,
                loss_fn=loss_fn,
                optimizer=optimizer,
                lr_scheduler=lr_scheduler,
                metrics_logger=metrics_logger,
                logger=logger,
            )
        except:
            logger.log_info("Failed to train model.")

        optimizer.zero_grad()
        with torch.no_grad():
            utils.save_ckpt(
                path2file=os.path.join(
                    cfg.MODEL.CKPT_DIR,
                    cfg.GENERAL.ID + "_" + str(epoch).zfill(3) + ".pth"),
                logger=logger,
                model=model.state_dict(),
                epoch=epoch,
                optimizer=optimizer,
                lr_scheduler=lr_scheduler,  # NOTE Need attribdict>=0.0.5
                loss_fn=loss_fn,
                metrics=metrics_logger,
            )
        try:
            evaluate(
                epoch=epoch,
                cfg=cfg,
                model=model,
                data_loader=valid_data_loader,
                device=device,
                loss_fn=loss_fn,
                metrics_logger=metrics_logger,
                phase="valid",
                logger=logger,
                save=cfg.SAVE.SAVE,
            )
        except:
            logger.log_info("Failed to evaluate model.")

        with torch.no_grad():
            utils.save_ckpt(
                path2file=os.path.join(
                    cfg.MODEL.CKPT_DIR,
                    cfg.GENERAL.ID + "_" + str(epoch).zfill(3) + ".pth"),
                logger=logger,
                model=model.state_dict(),
                epoch=epoch,
                optimizer=optimizer,
                lr_scheduler=lr_scheduler,  # NOTE Need attribdict>=0.0.5
                loss_fn=loss_fn,
                metrics=metrics_logger,
            )

    # If test set has target images, evaluate and save them, otherwise just try to generate output images.
    if cfg.DATA.DATASET == "DualPixelNTIRE2021":
        try:
            generate(
                cfg=cfg,
                model=model,
                data_loader=valid_data_loader,
                device=device,
                phase="valid",
                logger=logger,
            )
        except:
            logger.log_info(
                "Failed to generate output images of valid set of NTIRE2021.")
    try:
        evaluate(
            epoch=epoch,
            cfg=cfg,
            model=model,
            data_loader=test_data_loader,
            device=device,
            loss_fn=loss_fn,
            metrics_logger=metrics_logger,
            phase="test",
            logger=logger,
            save=True,
        )
    except:
        logger.log_info("Failed to test model, try to generate images.")
        try:
            generate(
                cfg=cfg,
                model=model,
                data_loader=test_data_loader,
                device=device,
                phase="test",
                logger=logger,
            )
        except:
            logger.log_info("Cannot generate output images of test set.")
    return None
Esempio n. 25
0
def test(cfg):
    """
    Perform multi-view testing/feature extraction on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        raise NotImplementedError("Unknown way to load checkpoint.")

    vid_root = cfg.DATA.PATH_TO_DATA_DIR
    videos_list_file = os.path.join(vid_root, "vid_list.csv")

    print("Loading Video List ...")
    with open(videos_list_file) as f:
        videos = sorted(
            [x.strip() for x in f.readlines() if len(x.strip()) > 0])
    print("Done")
    print("----------------------------------------------------------")

    print("{} videos to be processed...".format(len(videos)))
    print("----------------------------------------------------------")

    start_time = time.time()
    for vid in videos:
        # Create video testing loaders.
        path_to_vid = os.path.join(vid_root, os.path.split(vid)[0])
        vid_id = os.path.split(vid)[1]

        out_path = os.path.join(cfg.OUTPUT_DIR, os.path.split(vid)[0])
        out_file = vid_id.split(".")[0] + "_{}.npy".format(cfg.DATA.NUM_FRAMES)
        if os.path.exists(os.path.join(out_path, out_file)):
            print("{} already exists".format(out_file))
            continue

        print("Processing {}...".format(vid))

        dataset = VideoSet(cfg, path_to_vid, vid_id)
        test_loader = torch.utils.data.DataLoader(
            dataset,
            batch_size=cfg.TEST.BATCH_SIZE,
            shuffle=False,
            sampler=None,
            num_workers=cfg.DATA_LOADER.NUM_WORKERS,
            pin_memory=cfg.DATA_LOADER.PIN_MEMORY,
            drop_last=False,
        )

        # Perform multi-view test on the entire dataset.
        feat_arr = multi_view_test(test_loader, model, cfg)

        os.makedirs(out_path, exist_ok=True)
        np.save(os.path.join(out_path, out_file), feat_arr)
        print("Done.")
        print("----------------------------------------------------------")
    end_time = time.time()
    hours, minutes, seconds = calculate_time_taken(start_time, end_time)
    print("Time taken: {} hour(s), {} minute(s) and {} second(s)".format(
        hours, minutes, seconds))
    print("----------------------------------------------------------")
Esempio n. 26
0
def visualize(cfg,
              vis_file_list=None,
              use_gpu=False,
              vis_dir="visual",
              also_save_raw_results=False,
              ckpt_dir=None,
              log_writer=None,
              local_test=False,
              **kwargs):
    if vis_file_list is None:
        vis_file_list = cfg.DATASET.TEST_FILE_LIST
    dataset = SegDataset(file_list=vis_file_list,
                         mode=ModelPhase.VISUAL,
                         data_dir=cfg.DATASET.DATA_DIR)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL)
    # Clone forward graph
    test_prog = test_prog.clone(for_test=True)

    # Generator full colormap for maximum 256 classes
    color_map = get_color_map(256)

    # Get device environment
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    save_dir = os.path.join(vis_dir, 'visual_results')
    makedirs(save_dir)
    if also_save_raw_results:
        raw_save_dir = os.path.join(vis_dir, 'raw_results')
        makedirs(raw_save_dir)

    fetch_list = [pred.name]
    test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True)
    img_cnt = 0
    for imgs, grts, img_names, valid_shapes, org_shapes in test_reader:
        pred_shape = (imgs.shape[2], imgs.shape[3])
        pred, = exe.run(program=test_prog,
                        feed={'image': imgs},
                        fetch_list=fetch_list,
                        return_numpy=True)

        num_imgs = pred.shape[0]
        # TODO: use multi-thread to write images
        for i in range(num_imgs):
            # Add more comments
            res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8)
            img_name = img_names[i]
            grt = grts[i]
            res_shape = (res_map.shape[0], res_map.shape[1])
            if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[1]:
                res_map = cv2.resize(res_map,
                                     pred_shape,
                                     interpolation=cv2.INTER_NEAREST)
            valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1])
            res_map = res_map[0:valid_shape[0], 0:valid_shape[1]]
            org_shape = (org_shapes[i, 0], org_shapes[i, 1])
            res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]),
                                 interpolation=cv2.INTER_NEAREST)

            if grt is not None:
                grt = grt[0:valid_shape[0], 0:valid_shape[1]]
                grt = cv2.resize(grt, (org_shape[1], org_shape[0]),
                                 interpolation=cv2.INTER_NEAREST)

            png_fn = to_png_fn(img_names[i])
            if also_save_raw_results:
                raw_fn = os.path.join(raw_save_dir, png_fn)
                dirname = os.path.dirname(raw_save_dir)
                makedirs(dirname)
                cv2.imwrite(raw_fn, res_map)

            # colorful segment result visualization
            vis_fn = os.path.join(save_dir, png_fn)
            dirname = os.path.dirname(vis_fn)
            makedirs(dirname)

            pred_mask = colorize(res_map, org_shapes[i], color_map)
            if grt is not None:
                grt = colorize(grt, org_shapes[i], color_map)
            cv2.imwrite(vis_fn, pred_mask)

            img_cnt += 1
            print("#{} visualize image path: {}".format(img_cnt, vis_fn))

            # Use Tensorboard to visualize image
            if log_writer is not None:
                # Calulate epoch from ckpt_dir folder name
                epoch = int(os.path.split(ckpt_dir)[-1])
                print("Tensorboard visualization epoch", epoch)
                log_writer.add_image("Predict/{}".format(img_names[i]),
                                     pred_mask[..., ::-1],
                                     epoch,
                                     dataformats='HWC')
                # Original image
                # BGR->RGB
                img = cv2.imread(
                    os.path.join(cfg.DATASET.DATA_DIR,
                                 img_names[i]))[..., ::-1]
                log_writer.add_image("Images/{}".format(img_names[i]),
                                     img,
                                     epoch,
                                     dataformats='HWC')
                #add ground truth (label) images
                if grt is not None:
                    log_writer.add_image("Label/{}".format(img_names[i]),
                                         grt[..., ::-1],
                                         epoch,
                                         dataformats='HWC')

        # If in local_test mode, only visualize 5 images just for testing
        # procedure
        if local_test and img_cnt >= 5:
            break
Esempio n. 27
0
def visualize(cfg,
              vis_file_list=None,
              use_gpu=False,
              vis_dir="visual",
              ckpt_dir=None,
              log_writer=None,
              local_test=False,
              **kwargs):
    if vis_file_list is None:
        vis_file_list = cfg.DATASET.TEST_FILE_LIST
    dataset = SegDataset(file_list=vis_file_list,
                         mode=ModelPhase.VISUAL,
                         data_dir=cfg.DATASET.DATA_DIR)

    startup_prog = fluid.Program()
    test_prog = fluid.Program()
    pred, logit = build_model(test_prog, startup_prog, phase=ModelPhase.VISUAL)
    # Clone forward graph
    test_prog = test_prog.clone(for_test=True)

    # Generator full colormap for maximum 256 classes
    color_map = get_color_map_list(cfg.DATASET.NUM_CLASSES**2 if
                                   cfg.DATASET.NUM_CLASSES**2 < 256 else 256)

    # Get device environment
    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    ckpt_dir = cfg.TEST.TEST_MODEL if not ckpt_dir else ckpt_dir

    if ckpt_dir is not None:
        print('load test model:', ckpt_dir)
        try:
            fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
        except:
            fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)

    save_dir = vis_dir
    makedirs(save_dir)

    fetch_list = [pred.name]
    test_reader = dataset.batch(dataset.generator, batch_size=1, is_test=True)
    img_cnt = 0

    def exe_run():
        if cfg.DATASET.INPUT_IMAGE_NUM == 1:
            for imgs, grts, img_names, valid_shapes, org_shapes in test_reader:
                pred_shape = (imgs.shape[2], imgs.shape[3])
                pred, = exe.run(program=test_prog,
                                feed={'image1': imgs},
                                fetch_list=fetch_list,
                                return_numpy=True)
                yield pred, pred_shape, grts, img_names, valid_shapes, org_shapes
        else:
            for img1s, img2s, grts, img1_names, img2_names, valid_shapes, org_shapes in test_reader:
                pred_shape = (img1s.shape[2], img1s.shape[3])
                pred, = exe.run(program=test_prog,
                                feed={
                                    'image1': img1s,
                                    'image2': img2s
                                },
                                fetch_list=fetch_list,
                                return_numpy=True)
                yield pred, pred_shape, grts, img1_names, valid_shapes, org_shapes

    for pred, pred_shape, grts, img_names, valid_shapes, org_shapes in exe_run(
    ):
        idx = pred.shape[0]
        if cfg.DATASET.INPUT_IMAGE_NUM == 2 and cfg.VIS.SEG_FOR_CD:
            idx = pred.shape[0] // cfg.DATASET.INPUT_IMAGE_NUM
        pred1, pred2 = pred[:idx], pred[
            idx:]  # fluid.layers.split(pred, 2, dim=0)
        num_imgs = pred1.shape[0]
        # TODO: use multi-thread to write images
        for i in range(num_imgs):
            # Add more comments
            res_map_list = []
            for pred in [pred1, pred2]:
                if pred.shape[0] == 0:
                    continue
                #res_map = np.squeeze(pred[i, :, :, :]).astype(np.uint8)
                res_map = np.squeeze(pred[i, :, :, :]).astype(np.float32)
                res_shape = (res_map.shape[0], res_map.shape[1])
                if res_shape[0] != pred_shape[0] or res_shape[1] != pred_shape[
                        1]:
                    res_map = cv2.resize(res_map,
                                         pred_shape,
                                         interpolation=cv2.INTER_NEAREST)
                valid_shape = (valid_shapes[i, 0], valid_shapes[i, 1])
                res_map = res_map[0:valid_shape[0], 0:valid_shape[1]]
                org_shape = (org_shapes[i, 0], org_shapes[i, 1])
                res_map = cv2.resize(res_map, (org_shape[1], org_shape[0]),
                                     interpolation=cv2.INTER_NEAREST)
                res_map_list.append(res_map)

            img_name = img_names[i]
            png_fn = to_png_fn(img_name)

            # colorful segment result visualization
            vis_fn = os.path.join(save_dir, png_fn)
            dirname = os.path.dirname(vis_fn)
            makedirs(dirname)

            if cfg.DATASET.INPUT_IMAGE_NUM == 1 or \
                    (cfg.DATASET.INPUT_IMAGE_NUM == 2 and not cfg.VIS.SEG_FOR_CD):
                res_map = res_map_list[0]
                if cfg.VIS.RAW_PRED:
                    #pred_mask = PILImage.fromarray(res_map.astype(np.uint8), mode='L')
                    #pred_mask.save(vis_fn)
                    np.save(vis_fn.replace(".png", ".npy"),
                            res_map.astype(np.float32))
                else:
                    if cfg.VIS.ADD_LABEL:
                        grt_im = cv2.resize(grts[i],
                                            pred_shape,
                                            interpolation=cv2.INTER_NEAREST)
                        res_map = np.hstack((res_map, grt_im))
                    pred_mask = PILImage.fromarray(res_map.astype(np.uint8),
                                                   mode='P')
                    pred_mask.putpalette(color_map)
                    pred_mask.save(vis_fn)
            else:
                res_map1, res_map2 = res_map_list
                diff = res_map1 * cfg.DATASET.NUM_CLASSES + res_map2
                unchange_idx = np.where((res_map1 - res_map2) == 0)
                diff[unchange_idx] = 0
                res_map = np.hstack((res_map1, res_map2, diff))
                pred_mask = PILImage.fromarray(res_map.astype(np.uint8),
                                               mode='P')
                pred_mask.putpalette(color_map)
                pred_mask.save(vis_fn)

            img_cnt += 1
            print("#{} visualize image path: {}".format(img_cnt, vis_fn))

            # Use VisualDL to visualize image
            if log_writer is not None:
                # Calulate epoch from ckpt_dir folder name
                epoch = int(os.path.split(ckpt_dir)[-1])
                print("VisualDL visualization epoch", epoch)

                pred_mask_np = np.array(pred_mask.convert("RGB"))
                log_writer.add_image("Predict/{}".format(img_name),
                                     pred_mask_np, epoch)
                # Original image
                # BGR->RGB
                img = cv2.imread(os.path.join(cfg.DATASET.DATA_DIR,
                                              img_name))[..., ::-1]
                log_writer.add_image("Images/{}".format(img_name), img, epoch)
                # add ground truth (label) images
                grt = grts[i]
                if grt is not None:
                    grt = grt[0:valid_shape[0], 0:valid_shape[1]]
                    grt_pil = PILImage.fromarray(grt.astype(np.uint8),
                                                 mode='P')
                    grt_pil.putpalette(color_map)
                    grt_pil = grt_pil.resize((org_shape[1], org_shape[0]))
                    grt = np.array(grt_pil.convert("RGB"))
                    log_writer.add_image("Label/{}".format(img_name), grt,
                                         epoch)

        # If in local_test mode, only visualize 5 images just for testing
        # procedure
        if local_test and img_cnt >= 5:
            break