コード例 #1
0
ファイル: test_net.py プロジェクト: ygest/SlowFast
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, use_train_input=False)

    cu.load_test_checkpoint(cfg, model)

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    if cfg.DETECTION.ENABLE:
        assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0
        test_meter = AVAMeter(len(test_loader), cfg, mode="test")
    else:
        assert (
            test_loader.dataset.num_videos %
            (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0)
        # Create meters for multi-view testing.
        test_meter = TestMeter(
            test_loader.dataset.num_videos //
            (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
            cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
            cfg.MODEL.NUM_CLASSES,
            len(test_loader),
            cfg.DATA.MULTI_LABEL,
            cfg.DATA.ENSEMBLE_METHOD,
        )

    # Set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
            cfg.NUM_GPUS * cfg.NUM_SHARDS):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # # Perform multi-view test on the entire dataset.
    test_meter = perform_test(test_loader, model, test_meter, cfg, writer)
    if writer is not None:
        writer.close()
コード例 #2
0
def video_extract(cfg):
    ctx = multiprocessing.get_context("spawn")
    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)
    logger.info("Extract with config:")
    logger.info(cfg)

    # initialize model
    name = cfg.MODEL.MODEL_NAME
    model = MODEL_REGISTRY.get(name)(cfg)
    if torch.cuda.is_available():
        model = torch.nn.DataParallel(model).cuda()
    model.eval()

    # initialize data loader
    dataloader = Extractor(cfg)
    logger.info("Testing model for {} videos".format(len(dataloader)))

    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            path_to_checkpoint=cfg.TEST.CHECKPOINT_FILE_PATH,
            model=model,
            data_parallel=True,
            optimizer=None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        logger.info("Testing with random initialization. Only for debugging.")

    index_queue = ctx.Queue()
    result_queue = ctx.Queue()
    workers = [
        ctx.Process(target=get_video,
                    args=(dataloader, index_queue, result_queue))
        for i in range(cfg.TEST.WORKERS)
    ]

    for w in workers:
        w.daemon = True
        w.start()

    num_video = len(dataloader)

    for i in range(num_video):
        index_queue.put(i)

    # NUM_FRAMES must be divided by ALPHA.
    num_frames = cfg.DATA.NUM_FRAMES
    step_frames = num_frames
    fout = open(cfg.TEST.OUTPUT_FEATURE_FILE, "w")
    start_time = time.time()
    for i in range(num_video):
        video_data = result_queue.get()
        run(cfg, model, video_data, num_frames, step_frames, fout)
        period = time.time() - start_time
        logger.info(
            "video index: %d, period: %.2f sec, speed: %.2f sec/video." %
            (i, period, period / (i + 1)))
    fout.close()
コード例 #3
0
def train_des(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)
    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = build_clevrer_model(cfg)

    # Construct the optimizer.
    optimizer = AdamW(model.parameters(), lr=cfg.SOLVER.BASE_LR, eps=1e-8)
    start_epoch = cu.load_train_checkpoint(cfg, model, optimizer)
    # Create the video train and val loaders.
    train_loader = build_dataloader(cfg, "train")
    val_loader = build_dataloader(cfg, "val")

    total_steps = len(train_loader) * cfg.SOLVER.MAX_EPOCH

    # Create the learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,  # Default value in run_glue.py
        num_training_steps=total_steps)

    # Create meters.
    train_meter = ClevrerTrainMeter(len(train_loader), cfg)
    val_meter = ClevrerValMeter(len(val_loader), cfg)

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))
    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        # Train for one epoch.
        train_epoch(train_loader, model, optimizer, scheduler, train_meter,
                    cur_epoch, cfg)

        is_checkp_epoch = cu.is_checkpoint_epoch(
            cfg,
            cur_epoch,
            None,
        )
        is_eval_epoch = misc.is_eval_epoch(cfg, cur_epoch, None)

        # Save a checkpoint.
        if is_checkp_epoch:
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch,
                               cfg)
        # Evaluate the model on validation set.
        if is_eval_epoch:
            eval_epoch(val_loader, model, val_meter, cur_epoch, cfg)
コード例 #4
0
ファイル: extract.py プロジェクト: BigFishMaster/SlowFast
def extract(cfg):
    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)
    logger.info("Extract with config:")
    logger.info(cfg)

    # initialize model
    name = cfg.MODEL.MODEL_NAME
    model = MODEL_REGISTRY.get(name)(cfg)
    if torch.cuda.is_available():
        model = torch.nn.DataParallel(model).cuda()

    # initialize data loader
    dataloader = Extractor(cfg)
    logger.info("Testing model for {} videos".format(len(dataloader)))

    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            path_to_checkpoint=cfg.TEST.CHECKPOINT_FILE_PATH,
            model=model,
            data_parallel=True,
            optimizer=None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        logger.info("Testing with random initialization. Only for debugging.")

    run(dataloader, model, cfg)
コード例 #5
0
ファイル: demo_net.py プロジェクト: wxwoods/SlowFast
def run_demo(cfg, frame_provider):
    """
    Run demo visualization.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        frame_provider (iterator): Python iterator that return task objects that are filled
            with necessary information such as `frames`, `id` and `num_buffer_frames` for the
            prediction and visualization pipeline.
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)
    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)
    # Print config.
    logger.info("Run demo with config:")
    logger.info(cfg)
    assert cfg.NUM_GPUS <= 1, "Cannot run demo on multiple GPUs."
    # Print config.
    logger.info("Run demo with config:")
    logger.info(cfg)
    video_vis = VideoVisualizer(
        cfg.MODEL.NUM_CLASSES,
        cfg.DEMO.LABEL_FILE_PATH,
        cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
        cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
    )

    if cfg.DETECTION.ENABLE:
        object_detector = Detectron2Predictor(cfg)

    model = ActionPredictor(cfg)

    seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
    assert (cfg.DEMO.BUFFER_SIZE <= seq_len //
            2), "Buffer size cannot be greater than half of sequence length."
    init_task_info(
        frame_provider.display_height,
        frame_provider.display_width,
        cfg.DATA.TEST_CROP_SIZE,
        cfg.DEMO.CLIP_VIS_SIZE,
    )
    for able_to_read, task in frame_provider:
        if not able_to_read:
            break

        if cfg.DETECTION.ENABLE:
            task = object_detector(task)

        task = model(task)
        frames = draw_predictions(task, video_vis)
        # hit Esc to quit the demo.
        key = cv2.waitKey(1)
        if key == 27:
            break
        yield frames
コード例 #6
0
def run_demo(cfg, progress_callback=None):
    """
    :param cfg:
    :return:
    """
    # Set up environment.
    setup_environment()
    # Setup logging format
    logging.setup_logging(cfg.OUTPUT_DIR)
    logger.info("=== Demo started ===")
    multi_process_demo = MultiProcessDemo(cfg, progress_callback)
    multi_process_demo.run_demo()
    logger.info("=== Demo finished ===")
コード例 #7
0
def visualize(cfg):
    """
    Perform layer weights and activations visualization on the model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    if cfg.TENSORBOARD.ENABLE and cfg.TENSORBOARD.MODEL_VIS.ENABLE:
        # Set up environment.
        du.init_distributed_training(cfg)
        # Set random seed from configs.
        np.random.seed(cfg.RNG_SEED)
        torch.manual_seed(cfg.RNG_SEED)

        # Setup logging format.
        logging.setup_logging(cfg.OUTPUT_DIR)

        # Print config.
        logger.info("Model Visualization with config:")
        logger.info(cfg)

        # Build the video model and print model statistics.
        model = build_model(cfg)
        if du.is_master_proc() and cfg.LOG_MODEL_INFO:
            misc.log_model_info(model, cfg, is_train=False)

        cu.load_test_checkpoint(cfg, model)

        # Create video testing loaders.
        vis_loader = loader.construct_loader(cfg, "test")
        logger.info(
            "Visualize model for {} data points".format(len(vis_loader))
        )

        if cfg.DETECTION.ENABLE:
            assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE

        # Set up writer for logging to Tensorboard format.
        if du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS):
            writer = tb.TensorboardWriter(cfg)
        else:
            writer = None

        # Run visualization on the model
        run_visualization(vis_loader, model, cfg, writer)

        if writer is not None:
            writer.close()
コード例 #8
0
def init_model(cfg):
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Run demo with config:")
    logger.info(cfg)

    model = build_and_switch_demo_model(cfg)
    load_checkpoint(cfg, model)

    return model
コード例 #9
0
ファイル: test_net.py プロジェクト: gabrielsluz/SlowFast
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)

    cu.load_test_checkpoint(cfg, model)

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    # Create meters for loss tracking
    test_meter = TrainMeter(test_loader.dataset.num_videos, cfg)

    # Set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
        cfg.NUM_GPUS * cfg.NUM_SHARDS
    ):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # # Perform multi-view test on the entire dataset.
    test_meter = perform_test(test_loader, model, test_meter, cfg, writer)
    if writer is not None:
        writer.close()
コード例 #10
0
def experiment(cfg):
     # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Infer with config:")
    logger.info(cfg)

    # Build the SlowFast model and print its statistics
    model = build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model, cfg, is_train=False)

    # load weights
    if cfg.INFERENCE.WEIGHTS_FILE_PATH != "":
        cu.load_checkpoint(cfg.INFERENCE.WEIGHTS_FILE_PATH, model, cfg.NUM_GPUS > 1, None,
                           inflation=False, convert_from_caffe2=cfg.INFERENCE.WEIGHTS_TYPE == "caffe2")
    else:
        raise FileNotFoundError("Model weights file could not be found")

    perform_inference(model, cfg)
コード例 #11
0
def video_extract(cfg):
    ctx = multiprocessing.get_context("spawn")
    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)
    logger.info("Extract with config:")
    logger.info(cfg)

    # initialize data loader
    dataloader = Extractor(cfg)
    logger.info("Testing model for {} videos".format(len(dataloader)))

    index_queue = ctx.Queue()
    video_queue = ctx.Queue()
    result_queue = ctx.Queue()
    video_workers = [ctx.Process(target=get_video, args=(dataloader, index_queue, video_queue))
                     for i in range(cfg.TEST.WORKERS)]
    for w in video_workers:
        w.daemon = True
        w.start()

    result_workers = [ctx.Process(target=get_result, args=(cfg, gpu_id, video_queue, result_queue))
                      for gpu_id in range(cfg.NUM_GPUS)]
    for w in result_workers:
        w.daemon = True
        w.start()

    num_video = len(dataloader)
    for i in range(num_video):
        index_queue.put(i)

    # NUM_FRAMES must be divided by ALPHA.
    start_time = time.time()
    fout = open(cfg.TEST.OUTPUT_FEATURE_FILE, "w")
    for i in range(num_video):
        result = result_queue.get()
        fout.write(result + "\n")
        period = time.time() - start_time
        logger.info("video index: %d, period: %.2f sec, speed: %.2f sec/video."
                    %(i, period, period/(i+1)))
    fout.close()
コード例 #12
0
  DATA.PATH_TO_DATA_DIR /datasets/clevrer \
  DATA.PATH_PREFIX /datasets/clevrer \
  MONET.CHECKPOINT_LOAD ./monet_checkpoints/checkpoint_epoch_00140.pyth
"""


#https://discuss.pytorch.org/t/how-do-i-check-the-number-of-parameters-of-a-model/4325
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


args = parse_args()
cfg = load_config(args)

logger = logging.get_logger(__name__)
logging.setup_logging(cfg.OUTPUT_DIR)

dataset = Clevrer(cfg, 'train')
print("Dataset len = {}".format(len(dataset)))

#Test DataLoader
dataloader = DataLoader(dataset,
                        batch_size=cfg.TRAIN.BATCH_SIZE,
                        shuffle=True,
                        num_workers=0)

vocab_len = dataset.get_vocab_len()
ans_vocab_len = dataset.get_ans_vocab_len()

model = ClevrerMain(cfg, vocab_len, ans_vocab_len)
if cfg.NUM_GPUS:
コード例 #13
0
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Load a checkpoint to resume training if applicable.
    if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        checkpoint_epoch = cu.load_checkpoint(last_checkpoint, model,
                                              cfg.NUM_GPUS > 1, optimizer)
        start_epoch = checkpoint_epoch + 1
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        logger.info("Load from given checkpoint file.")
        checkpoint_epoch = cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            optimizer,
            inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
        start_epoch = checkpoint_epoch + 1
    else:
        start_epoch = 0

    # Create the video train and val loaders.
    train_loader = loader.construct_loader(cfg, "train")
    val_loader = loader.construct_loader(cfg, "val")

    # Create meters.
    if cfg.DETECTION.ENABLE:
        train_meter = AVAMeter(len(train_loader), cfg, mode="train")
        val_meter = AVAMeter(len(val_loader), cfg, mode="val")
    else:
        train_meter = TrainMeter(len(train_loader), cfg)
        val_meter = ValMeter(len(val_loader), cfg)

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))

    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)
        # Train for one epoch.
        train_epoch(train_loader, model, optimizer, train_meter, cur_epoch,
                    cfg)

        # Compute precise BN stats.
        if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0:
            calculate_and_update_precise_bn(train_loader, model,
                                            cfg.BN.NUM_BATCHES_PRECISE)

        # Save a checkpoint.
        if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD):
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch,
                               cfg)
        # Evaluate the model on validation set.
        if misc.is_eval_epoch(cfg, cur_epoch):
            eval_epoch(val_loader, model, val_meter, cur_epoch, cfg)
コード例 #14
0
def visualize_activations(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """

    # Setup logging format.
    logging.setup_logging(cfg)

    # Print config.
    logger.info("Vizualize activations")
    # logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = build_model(cfg)
    # Construct the optimizer.
    # optimizer = optim.construct_optimizer(model, cfg)

    logger.info("Load from given checkpoint file.")
    checkpoint_epoch = cu.load_checkpoint(
        cfg.TRAIN.CHECKPOINT_FILE_PATH,
        model,
        cfg.NUM_GPUS > 1,
        optimizer=None,
        inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
        convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
    )

    # if du.is_master_proc():
    #     misc.log_model_info(model, cfg, is_train=True)

    # Create the video train and val loaders.
    # train_loader = loader.construct_loader(cfg, "train")
    # val_loader = loader.construct_loader(cfg, "val")

    train_set = build_dataset(cfg.TEST.DATASET, cfg, "train")

    for i in np.random.choice(len(train_set), 5):

        # frames, label, _, _ = train_set.get_augmented_examples(i)
        frames, label, _, _ = train_set[i]
        inputs = frames
        inputs[0] = inputs[0][None, :]
        logger.info(frames[0].shape)
        # frames = frames[0].permute(0,2,3,4,1)
        frames = frames[0].squeeze().transpose(0, 1)  #.permute(1,2,3,0)
        logger.info(frames.shape)
        tv.utils.save_image(frames,
                            os.path.join(cfg.OUTPUT_DIR, 'example_%d.jpg' % i),
                            nrow=18,
                            normalize=True)

        for j in range(len(inputs)):
            inputs[j] = inputs[j].cuda(non_blocking=True)
        with torch.no_grad():
            # logger.info(inputs[i].shape)
            # sys.stdout.flush()
            inputs[0] = inputs[0][:min(3, len(inputs[0]))]
            output = model(inputs, extra=['frames'])

            # frames = frames[0].transpose(0,1)#.permute(1,2,3,0)
            # tv.utils.save_image(frames, os.path.join(cfg.OUTPUT_DIR, 'example_target_%d.jpg'%i), nrow=18, normalize=True)

            input_aug = output['input_aug']
            logger.info(input_aug.shape)

            input_aug = input_aug[0].transpose(0, 1)
            tv.utils.save_image(input_aug,
                                os.path.join(cfg.OUTPUT_DIR,
                                             'example_input_%d.jpg' % i),
                                nrow=18,
                                normalize=True)

            # mix_layer [1, timesteps, layers, activations]
            mix_out = output['mix_layer']  #.cpu().data.numpy().squeeze()
            for layer in range(len(mix_out)):
                logger.info('mix layer %d' % layer)
                logger.info(mix_out[layer].view([18, -1]).mean(1))
                images = mix_out[layer].transpose(1, 2).transpose(0, 1)
                logger.info(images.shape)
                images = images.reshape((-1, ) + images.shape[2:])
                images = (images - images.min())
                images = images / images.max()
                tv.utils.save_image(
                    images,
                    os.path.join(cfg.OUTPUT_DIR,
                                 'example_%d_mix_layer_l%d.jpg' % (i, layer)),
                    nrow=18,
                    normalize=True)

            # BU errors per timestep per layer (choose a random activation or the mean) also write out the mean/norm
            # [1, timesteps, layers, channels, height, width]

            bu_errors = output['bu_errors']  #.cpu()#.data.numpy().squeeze()

            for layer in range(len(bu_errors)):
                images = bu_errors[layer].transpose(1, 2).transpose(0, 1)
                images = (images - images.min())
                images = images / images.max()
                logger.info(images.shape)
                images = images.reshape((-1, ) + images.shape[2:])
                tv.utils.save_image(
                    images,
                    os.path.join(cfg.OUTPUT_DIR,
                                 'example_%d_bu_errors_l%d.jpg' % (i, layer)),
                    nrow=18,
                    normalize=True)

            # horiz inhibition per timestep per layer (choose a random activation or the mean) also write out the mean/norm
            # [1, timesteps, layers, channels, height, width]
            inhibition = output['H_inh']  #.cpu()#.data.numpy().squeeze()
            for layer in range(len(inhibition)):
                images = inhibition[layer].transpose(1, 2).transpose(0, 1)
                images = (images - images.min())
                images = images / images.max()
                logger.info(images.shape)
                images = images.reshape((-1, ) + images.shape[2:])

                tv.utils.save_image(
                    images,
                    os.path.join(cfg.OUTPUT_DIR,
                                 'example_%d_H_inh_l%d.jpg' % (i, layer)),
                    nrow=18,
                    normalize=True)

            # persistent state in between timesteps
            # [1, timesteps, layers, channels, height, width]
            hidden = output['hidden']  #.cpu()#.data.numpy().squeeze()
            for layer in range(len(hidden)):
                images = hidden[layer].transpose(1, 2).transpose(0, 1)
                images = (images - images.min())
                images = images / images.max()
                logger.info(images.shape)
                images = images.reshape((-1, ) + images.shape[2:])

                tv.utils.save_image(
                    images,
                    os.path.join(cfg.OUTPUT_DIR,
                                 'example_%d_hidden_l%d.jpg' % (i, layer)),
                    nrow=18,
                    normalize=True)
コード例 #15
0
def benchmark_data_loading(cfg):
    """
    Benchmark the speed of data loading in PySlowFast.
    Args:

        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    setup_environment()
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Benchmark data loading with config:")
    logger.info(pprint.pformat(cfg))

    timer = Timer()
    dataloader = loader.construct_loader(cfg, "train")
    logger.info("Initialize loader using {:.2f} seconds.".format(
        timer.seconds()))
    # Total batch size across different machines.
    batch_size = cfg.TRAIN.BATCH_SIZE * cfg.NUM_SHARDS
    log_period = cfg.BENCHMARK.LOG_PERIOD
    epoch_times = []
    # Test for a few epochs.
    for cur_epoch in range(cfg.BENCHMARK.NUM_EPOCHS):
        timer = Timer()
        timer_epoch = Timer()
        iter_times = []
        for cur_iter, _ in enumerate(tqdm.tqdm(dataloader)):
            if cur_iter > 0 and cur_iter % log_period == 0:
                iter_times.append(timer.seconds())
                ram_usage, ram_total = misc.cpu_mem_usage()
                logger.info(
                    "Epoch {}: {} iters ({} videos) in {:.2f} seconds. "
                    "RAM Usage: {:.2f}/{:.2f} GB.".format(
                        cur_epoch,
                        log_period,
                        log_period * batch_size,
                        iter_times[-1],
                        ram_usage,
                        ram_total,
                    ))
                timer.reset()
        epoch_times.append(timer_epoch.seconds())
        ram_usage, ram_total = misc.cpu_mem_usage()
        logger.info(
            "Epoch {}: in total {} iters ({} videos) in {:.2f} seconds. "
            "RAM Usage: {:.2f}/{:.2f} GB.".format(
                cur_epoch,
                len(dataloader),
                len(dataloader) * batch_size,
                epoch_times[-1],
                ram_usage,
                ram_total,
            ))
        logger.info(
            "Epoch {}: on average every {} iters ({} videos) take {:.2f}/{:.2f} "
            "(avg/std) seconds.".format(
                cur_epoch,
                log_period,
                log_period * batch_size,
                np.mean(iter_times),
                np.std(iter_times),
            ))
    logger.info("On average every epoch ({} videos) takes {:.2f}/{:.2f} "
                "(avg/std) seconds.".format(
                    len(dataloader) * batch_size,
                    np.mean(epoch_times),
                    np.std(epoch_times),
                ))
コード例 #16
0
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, is_train=False)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpoint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        logger.info("Testing with random initialization. Only for debugging.")

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    assert (len(test_loader.dataset) %
            (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS) == 0)
    # Create meters for multi-view testing.
    test_meter = TestMeter(
        len(test_loader.dataset) //
        (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
        cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
        cfg.MODEL.NUM_CLASSES,
        len(test_loader),
        cfg.DATA.MULTI_LABEL,
        cfg.DATA.ENSEMBLE_METHOD,
    )

    # Set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
            cfg.NUM_GPUS * cfg.NUM_SHARDS):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # # Perform multi-view test on the entire dataset.
    perform_test(test_loader, model, test_meter, cfg, writer)
    if writer is not None:
        writer.close()
コード例 #17
0
def visualize(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """

    # Setup logging format.
    logging.setup_logging(cfg)

    # Print config.
    logger.info("Train with config:")
    # logger.info(pprint.pformat(cfg))

    # # Build the video model and print model statistics.
    # model = build_model(cfg)
    # if du.is_master_proc():
    #     misc.log_model_info(model, cfg, is_train=True)

    # Create the video train and val loaders.
    # train_loader = loader.construct_loader(cfg, "train")
    # val_loader = loader.construct_loader(cfg, "val")

    train_set = build_dataset(cfg.TEST.DATASET, cfg, "train")

    for i in np.random.choice(len(train_set), 100):
        i = 14693
        # frames, label, _, _ = train_set.get_augmented_examples(i)
        logger.info(i)
        frames, label, _, meta = train_set[i]

        #logger.info(frames[0].shape)
        logger.info('done')

        # frames = frames[0].permute(0,2,3,4,1)
        frames = frames[0].transpose(0, 1)  #.permute(1,2,3,0)
        # logger.info('### Z score ##########')
        # logger.info('min')
        # logger.info(frames.min())
        # logger.info('max')
        # logger.info(frames.max())
        # logger.info('mean')
        # logger.info(frames.mean())
        # logger.info('var')
        # logger.info(frames.var())

        frames = frames * torch.tensor(
            cfg.DATA.STD)[None, :, None, None]  #[None,:,None,None,None]
        frames = frames + torch.tensor(
            cfg.DATA.MEAN)[None, :, None, None]  #[None,:,None,None,None]

        # logger.info('### normal ##########')
        # logger.info('min')
        # logger.info(frames.min())
        # logger.info('max')
        # logger.info(frames.max())
        # logger.info('mean')
        # logger.info(frames.mean())
        # logger.info('var')
        # logger.info(frames.var())
        masks = meta['masks']
        masks = torch.cat([masks] * 3, 0).transpose(0, 1) / masks.max()
        frames = frames / frames.max()
コード例 #18
0
ファイル: demo_net.py プロジェクト: VandanaAgarwal/SlowFast
def run_demo(cfg, frame_provider):
    """
    Run demo visualization.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
        frame_provider (iterator): Python iterator that return task objects that are filled
            with necessary information such as `frames`, `id` and `num_buffer_frames` for the
            prediction and visualization pipeline.
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)
    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)
    # Print config.
    logger.info("Run demo with config:")
    logger.info(cfg)
    common_classes = (cfg.DEMO.COMMON_CLASS_NAMES
                      if len(cfg.DEMO.LABEL_FILE_PATH) != 0 else None)

    video_vis = VideoVisualizer(
        num_classes=cfg.MODEL.NUM_CLASSES,
        class_names_path=cfg.DEMO.LABEL_FILE_PATH,
        top_k=cfg.TENSORBOARD.MODEL_VIS.TOPK_PREDS,
        thres=cfg.DEMO.COMMON_CLASS_THRES,
        lower_thres=cfg.DEMO.UNCOMMON_CLASS_THRES,
        common_class_names=common_classes,
        colormap=cfg.TENSORBOARD.MODEL_VIS.COLORMAP,
        mode=cfg.DEMO.VIS_MODE,
    )

    # VA edits begin
    #async_vis = AsyncVis(video_vis, n_workers=cfg.DEMO.NUM_VIS_INSTANCES)
    async_vis = AsyncVis(video_vis,
                         n_workers=cfg.DEMO.NUM_VIS_INSTANCES,
                         label_filepath=cfg.DEMO.LABEL_FILE_PATH)
    # VA edits end

    if cfg.NUM_GPUS <= 1:
        model = ActionPredictor(cfg=cfg, async_vis=async_vis)
    else:
        model = AsyncDemo(cfg=cfg, async_vis=async_vis)

    seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE

    assert (cfg.DEMO.BUFFER_SIZE <= seq_len //
            2), "Buffer size cannot be greater than half of sequence length."
    num_task = 0
    # Start reading frames.
    frame_provider.start()
    for able_to_read, task in frame_provider:
        if not able_to_read:
            break
        if task is None:
            time.sleep(0.02)
            continue
        num_task += 1

        model.put(task)
        try:
            task = model.get()
            num_task -= 1
            yield task
        except IndexError:
            continue

    while num_task != 0:
        try:
            task = model.get()
            num_task -= 1
            yield task
        except IndexError:
            continue
コード例 #19
0
    def __init__(self, cfg, progress_callback):

        # Set up environment.
        setup_environment()
        # Setup logging format
        logging.setup_logging(cfg.OUTPUT_DIR)

        logger.info("Demo with config:")
        logger.info(pprint.pformat(cfg))

        # Prepare the input video for best demo results
        cfg.DEMO.VIDEO_SOURCE_PATH_AT_FPS = self.create_demo_video_at_target_framerate(
            cfg.DEMO.VIDEO_SOURCE_PATH, cfg.CUSTOM_DATASET.FRAME_RATE)

        self.cfg = cfg

        # An output folder for all demo-related output
        output_datetime = datetime.datetime.now().strftime("%Y-%m-%d_%H_%M_%S")
        self.cfg.DEMO.OUTPUT_FOLDER = os.path.join(
            self.cfg.CUSTOM_DATASET.DEMO_DIR, output_datetime)
        create_folder(self.cfg.DEMO.OUTPUT_FOLDER)
        logger.info("Created output-folder for demo results at: " +
                    self.cfg.DEMO.OUTPUT_FOLDER)

        # (pyqtSignal) used for signaling back the progress for the GUI
        # We currently take the progress as the percentage of distributed images
        self.progress_callback = progress_callback

        # Used for extracting the data frames from the video file
        self.file_video_stream = FileVideoStream(
            self.cfg.DEMO.VIDEO_SOURCE_PATH_AT_FPS)
        self.video_file_name = Path(self.cfg.DEMO.VIDEO_SOURCE_PATH).stem

        # Whether we display our results
        self.use_video_visualizer = self.cfg.DEMO.VIDEO_SHOW_VIDEO_ENABLE or self.cfg.DEMO.VIDEO_EXPORT_VIDEO_ENABLE

        # Whether we export our output
        self.export_output = self.cfg.DEMO.EXPORT_EXPORT_RESULTS

        # The fps of the video video source
        self.frames_per_second = self.file_video_stream.frames_per_second
        self.video_length_seconds = self.file_video_stream.video_length_seconds

        # Information on the sampling requirements for the
        # video data
        self.sample_rate = self.cfg.DATA.SAMPLING_RATE
        self.num_frames = self.cfg.DATA.NUM_FRAMES
        self.seq_len = self.sample_rate * self.num_frames
        self.half_seq_len = int(self.seq_len / 2)
        self.half_seq_len_seconds = self.half_seq_len / self.frames_per_second

        # The seconds in the video that are suited for inference
        self.earliest_full_start_second = np.math.ceil(
            self.half_seq_len_seconds)
        self.final_full_second = math.floor(
            self.video_length_seconds) - math.ceil(self.half_seq_len_seconds)
        # Set the current_second to start. The current second is the second for which we make the prediction
        self.current_video_second = self.earliest_full_start_second

        # Used for telling the gui the progress of our distribute images function [0, final_full_second] seconds
        self.number_of_relevant_frames = (self.final_full_second +
                                          1) * self.frames_per_second

        # The corresponding frame index to any middle_frame_timestamp of interest
        self.first_middle_frame_index = sec_to_frame(
            self.earliest_full_start_second, self.cfg, mode="demo") - 1
        # Used to determine whether an index is a middle frame index for which action recognition is done
        self.current_middle_frame_index = self.first_middle_frame_index

        # The inference frame indices are sampled around the middle frame as defined for slowfast
        # when using ava_dataset.
        # Here we have indices. index = frame number - 1
        self.inference_frame_indices = list(
            range(self.current_middle_frame_index + 1 - self.half_seq_len,
                  self.current_middle_frame_index + 1 + self.half_seq_len,
                  self.sample_rate))
        # Indicates whether the main process should put the next image in the input_detection_queue
        self.next_image_in_relevant_range = self.current_video_second <= self.final_full_second

        # Multiprocessing configs:
        # How many cpus we have
        self.num_cpu = mp.cpu_count()

        # We have 5 processes in parallel in the simplest case of the demo
        # 1. Main, 2. Object Predictor, 3. Deep Sort Tracker, 4. Video Visualizer, 5. Action Recognizer
        self.num_occupied_processes = 5

        assert self.num_cpu >= self.num_occupied_processes, "You need at least " + str(
            self.num_occupied_processes
        ) + " cores for the multiprocessing demo"

        self.free_cpu_cores = self.num_cpu - self.num_occupied_processes
        # How many gpus we have for the demo
        self.num_gpu = self.cfg.NUM_GPUS

        # How many gpus should be used for object detection (increasing number)
        self.num_gpu_object_detection = min(self.free_cpu_cores, self.num_gpu)

        # The gpuid for action recognition (decreasing or in our case last gpuid
        # We take the las possible gpuid for action recognition because this is beneficiary, if we have
        # less processes than free_cpu_cores (object detection and action recognition are separated this way)
        self.gpuid_action_recognition = self.num_gpu - 1

        # The queue sizes as specified in the config files
        self.queue_size = self.cfg.DEMO.QSIZE_SECONDS * self.cfg.CUSTOM_DATASET.FRAME_RATE

        # Queues
        # Contains the original images with an idx each:
        #   1. img_idx (int)
        #   2. image of shape (H, W, C) (in BGR order) and [0,255])
        self.input_detection_queue = mp.Queue(maxsize=self.queue_size)
        # Queue containing the detections per image in form
        #   1. img_idx (int),
        #   2. image of shape (H, W, C) (in BGR order) and [0,255]),
        #   3. predictions {dict}: a dict with the following keys
        #       pred_boxes: tensor of shape num_predictions, 4 =
        #                   the coordinates of the predicted boxes [x1, y1, x2, y2]) --> if empty it is []
        #       scores: tensor of shape (num_predictions) containing the confidence scores [0,1]) --> if empty it is []
        self.output_detection_queue = mp.Queue(maxsize=self.queue_size)
        # Contains the images with the corresponding ids and person_tracking_outputs -> used for visualization
        #   1. img_idx (int)
        #   2. image of shape (H, W, C) (in BGR order) and [0,255])
        #   3. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number)
        #                          --> if empty it is a list []
        self.output_tracker_queue_visualization = mp.Queue(
            maxsize=self.queue_size)

        # Contains the images with the corresponding ids and person_tracking_outputs -> used for action recognition
        #   1. img_idx (int)
        #   2. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number)
        #                          --> if empty it is a list []
        self.output_tracker_queue_action_recognition = mp.Queue(
            maxsize=self.queue_size)

        # Contains the input for action_recognition (only for img_idxs that are middle_frames)
        #   1. current_video_second: (int) the current video second for which the prediction data is given
        #   2. img_idxs=current_middle_frame_index (int) the image img_idx, which is always the next middle_frame_index
        #   3. img_idx (int) = the idx of the current middle_frame
        #   4. image of shape (H, W, C) (in BGR order) and [0,255])
        # It is bigger than the other queues
        self.input_action_recognition_queue = mp.Queue(
            maxsize=int(self.queue_size * 1.5))

        # Contains the input for action_recognition (only for img_idxs that are middle_frames)
        #   1. img_idx (int), only for middle frames
        #   2. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number)
        #                          --> if empty it is a list []
        #   3. pred_action_category_scores (ndarray float32) shape(num_person_ids, num_categories),
        #                                       the scores for each person and each action category
        #                                   --> if empty it is a list []
        self.output_action_recognition_queue_visualization = mp.Queue(
            maxsize=self.queue_size)

        # Contains the input for action_recognition (only for img_idxs that are middle_frames)
        #   1. current_video_second: (int) the current video second for which the prediction data is given
        #   2. person_tracking_outputs: ndarray with shape (num_identities, 5(int)= x1,y1,x2,y2,identity_number)
        #                          --> if empty it is a list []
        #   3. pred_action_category_scores (ndarray float32) shape(num_person_ids, num_categories),
        #                                       the scores for each person and each action category
        #                                   --> if empty it is a list []
        self.output_action_recognition_queue_result_export = mp.Queue(
            maxsize=int(self.video_length_seconds * self.frames_per_second))

        # A list of dicts that contains detected middle_frame_seconds
        self.middle_frame_seconds = []

        # The detectron2_object_predictor_class for person detection
        self.object_predictor = DemoDetectron2ObjectPredictor(
            self.cfg,
            self.file_video_stream.height,
            self.file_video_stream.width,
            parallel=True,
            num_gpu=self.num_gpu_object_detection,
            input_queue=self.input_detection_queue,
            output_queue=self.output_detection_queue,
            gpuid_action_recognition=self.gpuid_action_recognition)

        # The deep sort tracker class for person tracking
        self.deep_sort_tracker = DeepSortTracker(
            self.cfg,
            input_queue=self.output_detection_queue,
            output_queue_vis=self.output_tracker_queue_visualization,
            output_queue_action_pred=self.
            output_tracker_queue_action_recognition,
            show_video=self.use_video_visualizer)

        # The action recognition class
        self.action_recognizer = ActionRecognizer(
            self.cfg,
            self.file_video_stream.height,
            self.file_video_stream.width,
            model_device=self.gpuid_action_recognition,
            first_middle_frame_index=self.first_middle_frame_index,
            sample_rate=self.sample_rate,
            half_seq_len=self.half_seq_len,
            current_video_second=self.current_video_second,
            input_queue_tracker=self.output_tracker_queue_action_recognition,
            input_queue_images=self.input_action_recognition_queue,
            output_queue=self.output_action_recognition_queue_visualization,
            output_action_recognition_queue_result_export=self.
            output_action_recognition_queue_result_export)

        if self.export_output:
            # Our demo meter to store and finally print the results
            self.demo_meter = DemoMeter(self.cfg,
                                        self.file_video_stream.height,
                                        self.file_video_stream.width)
            # Used to control the completeness of our export
            self.current_export_second = self.earliest_full_start_second - 1

        if self.use_video_visualizer:
            self.demo_visualizer = VideoVisualizer(
                self.cfg,
                self.file_video_stream.height,
                self.first_middle_frame_index,
                self.frames_per_second,
                input_detection_queue=self.input_detection_queue,
                output_detection_queue=self.output_detection_queue,
                output_tracker_queue_visualization=self.
                output_tracker_queue_visualization,
                output_tracker_queue_action_recognition=self.
                output_tracker_queue_action_recognition,
                input_action_recognition_queue=self.
                input_action_recognition_queue,
                output_action_recognition_queue_visualization=self.
                output_action_recognition_queue_visualization,
                output_action_recognition_queue_result_export=self.
                output_action_recognition_queue_result_export)
コード例 #20
0
ファイル: train_net_des.py プロジェクト: gabrielsluz/SlowFast
def test_implementation_des(cfg):
    """
    Simulates a train and val epoch to check if the gradients are being updated,
    metrics are being calculated correctly
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)
    # Print config.
    logger.info("Test implementation")

    # Build the video model and print model statistics.
    model = build_clevrer_model(cfg)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    start_epoch = cu.load_train_checkpoint(cfg, model, optimizer)

    # Create the video train and val loaders.
    if cfg.TRAIN.DATASET != 'Clevrer_des':
        print("This train script does not support your dataset: -{}-. Only Clevrer_des".format(cfg.TRAIN.DATASET))
        exit()
    
    train_loader = build_dataloader(cfg, "train")
    val_loader = build_dataloader(cfg, "val")

    # Create meters.
    train_meter = ClevrerTrainMeter(len(train_loader), cfg)
    val_meter = ClevrerValMeter(len(val_loader), cfg)

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))
    # Train for one epoch.
    model_before = copy.deepcopy(model)
    cur_epoch = start_epoch
    train_epoch(
        train_loader, model, optimizer, train_meter, cur_epoch, cfg, test_imp=True
    )
    print("Check how much parameters changed")
    for (p_b_name, p_b), (p_name, p) in zip(model_before.named_parameters(), model.named_parameters()):
        if p.requires_grad:
            print("Parameter requires grad:")
            print(p_name, p_b_name)
            #Calculate ratio of change
            change = torch.abs(torch.norm(p) - torch.norm(p_b))
            print("Ratio of change = {}".format(torch.true_divide(change, torch.norm(p_b))))
            if (p_b != p).any():
                print("--Check--")
            else:
                print("ALERT - WEIGHTS DID NOT CHANGE WITH TRAINING.")
        else:
            print("Parameter does not require grad:")
            print(p_name)
            print(p)
    print("Val epoch")
    eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, test_imp=True)
コード例 #21
0
ファイル: train_s_net.py プロジェクト: serre-lab/pred_gn
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """


    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg)

    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS):
        writer = SummaryWriter(log_dir=cfg.OUTPUT_DIR)

    else:
        writer = None

    if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS) and not cfg.DEBUG:
        tags = []
        if 'TAGS' in cfg and cfg.TAGS !=[]:
            tags=list(cfg.TAGS)
        neptune.set_project('Serre-Lab/motion')

        ######################
        overrides = sys.argv[1:]

        overrides_dict = {}
        for i in range(len(overrides)//2):
            overrides_dict[overrides[2*i]] = overrides[2*i+1]
        overrides_dict['dir'] = cfg.OUTPUT_DIR
        ######################


        if 'NEP_ID' in cfg and cfg.NEP_ID != "":
            session = Session()
            project = session.get_project(project_qualified_name='Serre-Lab/motion')
            nep_experiment = project.get_experiments(id=cfg.NEP_ID)[0]

        else:
            nep_experiment = neptune.create_experiment (name=cfg.NAME,
                                        params=overrides_dict,
                                        tags=tags)
    else:
        nep_experiment=None

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc(num_gpus=cfg.NUM_GPUS):
        misc.log_model_info(model, cfg, is_train=True)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Load a checkpoint to resume training if applicable.
    if cfg.TRAIN.AUTO_RESUME and cu.has_checkpoint(cfg.OUTPUT_DIR):
        logger.info("Load from last checkpoint.")
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        checkpoint_epoch = cu.load_checkpoint(
            last_checkpoint, model, cfg.NUM_GPUS > 1, optimizer
        )
        start_epoch = checkpoint_epoch + 1
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        logger.info("Load from given checkpoint file.")
        checkpoint_epoch = cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            optimizer,
            inflation=cfg.TRAIN.CHECKPOINT_INFLATE,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
        start_epoch = checkpoint_epoch + 1
    else:
        start_epoch = 0

    # Create the video train and val loaders.
    train_loader = loader.construct_loader(cfg, "train")
    val_loader = loader.construct_loader(cfg, "val")

    # Create meters.
    if cfg.DETECTION.ENABLE:
        train_meter = AVAMeter(len(train_loader), cfg, mode="train")
        val_meter = AVAMeter(len(val_loader), cfg, mode="val")
    else:
        train_meter = TrainMeter(len(train_loader), cfg)
        val_meter = ValMeter(len(val_loader), cfg)

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))

    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)
        # Train for one epoch.
        train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, writer, nep_experiment, cfg)

        # Compute precise BN stats.
        # if cfg.BN.USE_PRECISE_STATS and len(get_bn_modules(model)) > 0:
        #     calculate_and_update_precise_bn(
        #         train_loader, model, cfg.BN.NUM_BATCHES_PRECISE
        #     )

        # Save a checkpoint.
        if cu.is_checkpoint_epoch(cur_epoch, cfg.TRAIN.CHECKPOINT_PERIOD):
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg)
        # Evaluate the model on validation set.
        if misc.is_eval_epoch(cfg, cur_epoch):
            eval_epoch(val_loader, model, val_meter, cur_epoch, nep_experiment, cfg)

        if du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS) and not cfg.DEBUG:
            nep_experiment.log_metric('epoch', cur_epoch)
コード例 #22
0
    def get_predictions(self):
        """
        Predict and append prediction results to each box in each keyframe in
        `self.pred_boxes` dictionary.
        """
        # Set random seed from configs.
        np.random.seed(self.cfg.RNG_SEED)
        torch.manual_seed(self.cfg.RNG_SEED)

        # Setup logging format.
        logging.setup_logging(self.cfg.OUTPUT_DIR)

        # Print config.
        logger.info("Run demo with config:")
        logger.info(self.cfg)
        assert (self.cfg.NUM_GPUS <=
                1), "Cannot run demo visualization on multiple GPUs."

        # Build the video model and print model statistics.
        model = build_model(self.cfg)
        model.eval()
        logger.info("Start loading model info")
        misc.log_model_info(model, self.cfg, use_train_input=False)
        logger.info("Start loading model weights")
        cu.load_test_checkpoint(self.cfg, model)
        logger.info("Finish loading model weights")
        logger.info("Start making predictions for precomputed boxes.")
        for keyframe_idx, boxes_and_labels in tqdm.tqdm(
                self.pred_boxes.items()):
            inputs = self.get_input_clip(keyframe_idx)
            boxes = boxes_and_labels[0]
            boxes = torch.from_numpy(np.array(boxes)).float()

            box_transformed = scale_boxes(
                self.cfg.DATA.TEST_CROP_SIZE,
                boxes,
                self.display_height,
                self.display_width,
            )

            # Pad frame index for each box.
            box_inputs = torch.cat(
                [
                    torch.full((box_transformed.shape[0], 1), float(0)),
                    box_transformed,
                ],
                axis=1,
            )
            if self.cfg.NUM_GPUS:
                # Transfer the data to the current GPU device.
                if isinstance(inputs, (list, )):
                    for i in range(len(inputs)):
                        inputs[i] = inputs[i].cuda(non_blocking=True)
                else:
                    inputs = inputs.cuda(non_blocking=True)

                box_inputs = box_inputs.cuda()

            preds = model(inputs, box_inputs)

            preds = preds.detach()

            if self.cfg.NUM_GPUS:
                preds = preds.cpu()

            boxes_and_labels[1] = preds
コード例 #23
0
def test(cfg):
    """
    Perform multi-view testing on the pretrained video model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Test with config:")
    logger.info(cfg)

    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)
    if du.is_master_proc():
        misc.log_model_info(model)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        cu.load_checkpoint(
            cfg.TEST.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TEST.CHECKPOINT_TYPE == "caffe2",
        )
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
        cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        cu.load_checkpoint(
            cfg.TRAIN.CHECKPOINT_FILE_PATH,
            model,
            cfg.NUM_GPUS > 1,
            None,
            inflation=False,
            convert_from_caffe2=cfg.TRAIN.CHECKPOINT_TYPE == "caffe2",
        )
    else:
        # raise NotImplementedError("Unknown way to load checkpoint.")
        logger.info("Testing with random initialization. Only for debugging.")

    # Create video testing loaders.
    test_loader = loader.construct_loader(cfg, "test")
    logger.info("Testing model for {} iterations".format(len(test_loader)))

    if cfg.DETECTION.ENABLE:
        assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE
        test_meter = AVAMeter(len(test_loader), cfg, mode="test")
    else:
        assert (
            len(test_loader.dataset)
            % (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS)
            == 0
        )
        # Create meters for multi-view testing.
        test_meter = TestMeter(
            len(test_loader.dataset)
            // (cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS),
            cfg.TEST.NUM_ENSEMBLE_VIEWS * cfg.TEST.NUM_SPATIAL_CROPS,
            cfg.MODEL.NUM_CLASSES,
            len(test_loader),
        )

    # # Perform multi-view test on the entire dataset.
    perform_test(test_loader, model, test_meter, cfg)
コード例 #24
0
    def __init__(self,
                 cfg,
                 img_height,
                 first_middle_frame_index,
                 frames_per_second,
                 input_detection_queue=None,
                 output_detection_queue=None,
                 output_tracker_queue_visualization=None,
                 output_tracker_queue_action_recognition=None,
                 input_action_recognition_queue=None,
                 output_action_recognition_queue_visualization=None,
                 output_action_recognition_queue_result_export=None):
        """
        Initialize the object
        :param cfg: our demo config
        :param img_height: (int) the height of the image
        :param first_middle_frame_index: (int) the index of the first middle_frame index
        :param frames_per_second: (float) the fps of the video -> required for determining middle frames
        :param input_detection_queue: please refer to class MultiProcessDemo
        :param output_detection_queue: please refer to class MultiProcessDemo
        :param output_tracker_queue_visualization: please refer to class MultiProcessDemo
        :param output_tracker_queue_action_recognition: please refer to class MultiProcessDemo
        :param input_action_recognition_queue: please refer to class MultiProcessDemo
        :param output_action_recognition_queue_visualization: please refer to class MultiProcessDemo
        :param output_action_recognition_queue_result_export: please refer to class MultiProcessDemo
        """

        setup_environment()
        # Setup logging format
        logging.setup_logging(cfg.OUTPUT_DIR)

        self.cfg = cfg

        # The name of the input video
        self.demo_video_name = Path(self.cfg.DEMO.VIDEO_SOURCE_PATH).stem

        # Whether we will export an image
        self.export_video = self.cfg.DEMO.VIDEO_EXPORT_VIDEO_ENABLE

        if self.export_video:
            # number of digits for exporting the images (determines how many images can be stored)
            self.number_of_digits_for_image_export = 10
            # The path of the to be created video
            self.export_video_path = os.path.join(
                self.cfg.DEMO.OUTPUT_FOLDER,
                self.demo_video_name + "_annotated.mp4")

        # Whether we will display an image
        self.display_video = self.cfg.DEMO.VIDEO_SHOW_VIDEO_ENABLE

        self.cv2_display_name = "Demo: " + self.demo_video_name

        # Whether we will display the meta information (Queues Sizes and img idx)
        self.display_meta_info = cfg.DEMO.VIDEO_SHOW_VIDEO_DEBUGGING_INFO
        # Used for finding the position of meta info
        self.img_height = img_height
        # Used for determining middle_frame_indices (they have the action prediction)
        self.first_middle_frame_index = first_middle_frame_index
        self.frames_per_second = frames_per_second

        # Additional options for displaying the video
        self.video_display_scaling_factor = cfg.DEMO.VIDEO_DISPLAY_SCALING_FACTOR
        self.video_action_display_duration_milliseconds = cfg.DEMO.VIDEO_ACTION_DISPLAY_DURATION_MILLISECONDS

        # The queues containing relevant information
        self.input_detection_queue = input_detection_queue
        self.output_detection_queue = output_detection_queue,
        self.output_tracker_queue_visualization = output_tracker_queue_visualization
        self.output_tracker_queue_action_recognition = output_tracker_queue_action_recognition,
        self.input_action_recognition_queue = input_action_recognition_queue
        self.output_action_recognition_queue_visualization = output_action_recognition_queue_visualization
        self.output_action_recognition_queue_result_export = output_action_recognition_queue_result_export
        # The queue sizes as specified in the config files
        self.queue_size = self.cfg.DEMO.QSIZE_SECONDS * self.cfg.CUSTOM_DATASET.FRAME_RATE

        # Used for terminating the process successfully
        self.action_recognition_input_finished = False

        # The information for displaying actions
        # Load the categories:
        self.path_to_label_map_file = os.path.join(cfg.CUSTOM_DATASET.ANNOTATION_DIR, cfg.CUSTOM_DATASET.LABEL_MAP_FILE) \
            if not os.path.isfile(cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE) \
            else cfg.ACTIONRECOGNIZER.LABEL_MAP_FILE
        # List of dicts (id, name)
        self.action_categories, _ = read_labelmap(self.path_to_label_map_file)
        # A color value for every category
        self.palette_actions = np.random.randint(
            64, 128, (len(self.action_categories), 3)).tolist()

        # The information required for displaying person_tracking info
        self.palette_person_ids = (2**11 - 1, 2**15 - 1, 2**20 - 1)

        # The process for person detection
        self.display_next_frame_process = mp.Process(
            target=self.display_and_or_export_next_frame, args=())

        # Used to test the correct order of images
        self.display_img_idx = -1

        # The information for action info display
        self.current_action_output_img_idx = ""
        self.current_pred_action_category_scores = ""
コード例 #25
0
def visualize(cfg):
    """
    Perform layer weights and activations visualization on the model.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    if cfg.TENSORBOARD.ENABLE and (cfg.TENSORBOARD.MODEL_VIS.ENABLE
                                   or cfg.TENSORBOARD.WRONG_PRED_VIS.ENABLE):
        # Set up environment.
        du.init_distributed_training(cfg)
        # Set random seed from configs.
        np.random.seed(cfg.RNG_SEED)
        torch.manual_seed(cfg.RNG_SEED)

        # Setup logging format.
        logging.setup_logging(cfg.OUTPUT_DIR)

        # Print config.
        logger.info("Model Visualization with config:")
        logger.info(cfg)

        # Build the video model and print model statistics.
        model = build_model(cfg)
        model.eval()
        if du.is_master_proc() and cfg.LOG_MODEL_INFO:
            misc.log_model_info(model, cfg, use_train_input=False)

        cu.load_test_checkpoint(cfg, model)

        # Create video testing loaders.
        vis_loader = loader.construct_loader(cfg, "test")

        if cfg.DETECTION.ENABLE:
            assert cfg.NUM_GPUS == cfg.TEST.BATCH_SIZE or cfg.NUM_GPUS == 0

        # Set up writer for logging to Tensorboard format.
        if du.is_master_proc(cfg.NUM_GPUS * cfg.NUM_SHARDS):
            writer = tb.TensorboardWriter(cfg)
        else:
            writer = None
        if cfg.TENSORBOARD.PREDICTIONS_PATH != "":
            assert not cfg.DETECTION.ENABLE, "Detection is not supported."
            logger.info(
                "Visualizing class-level performance from saved results...")
            if writer is not None:
                with g_pathmgr.open(cfg.TENSORBOARD.PREDICTIONS_PATH,
                                    "rb") as f:
                    preds, labels = pickle.load(f, encoding="latin1")

                writer.plot_eval(preds, labels)

        if cfg.TENSORBOARD.MODEL_VIS.ENABLE:
            if cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.ENABLE:
                assert (
                    not cfg.DETECTION.ENABLE
                ), "Detection task is currently not supported for Grad-CAM visualization."
                if cfg.MODEL.ARCH in cfg.MODEL.SINGLE_PATHWAY_ARCH:
                    assert (
                        len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) == 1
                    ), "The number of chosen CNN layers must be equal to the number of pathway(s), given {} layer(s).".format(
                        len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST))
                elif cfg.MODEL.ARCH in cfg.MODEL.MULTI_PATHWAY_ARCH:
                    assert (
                        len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST) == 2
                    ), "The number of chosen CNN layers must be equal to the number of pathway(s), given {} layer(s).".format(
                        len(cfg.TENSORBOARD.MODEL_VIS.GRAD_CAM.LAYER_LIST))
                else:
                    raise NotImplementedError(
                        "Model arch {} is not in {}".format(
                            cfg.MODEL.ARCH,
                            cfg.MODEL.SINGLE_PATHWAY_ARCH +
                            cfg.MODEL.MULTI_PATHWAY_ARCH,
                        ))
            logger.info("Visualize model analysis for {} iterations".format(
                len(vis_loader)))
            # Run visualization on the model
            run_visualization(vis_loader, model, cfg, writer)
        if cfg.TENSORBOARD.WRONG_PRED_VIS.ENABLE:
            logger.info("Visualize Wrong Predictions for {} iterations".format(
                len(vis_loader)))
            perform_wrong_prediction_vis(vis_loader, model, cfg)

        if writer is not None:
            writer.close()
コード例 #26
0
    def __init__(self,
                 cfg,
                 img_height,
                 img_width,
                 parallel=False,
                 num_gpu=None,
                 input_queue=None,
                 output_queue=None,
                 gpuid_action_recognition=None):
        """
        Creates an Detectron2 based prediction class
        which is optimized for demo and should be used for it.
        The code is slightly modified from the original detectron2 demo content
        :param cfg: the config file for the prototype
        :param img_height: (int) the height of the input images
        :param img_width: (int) the width of input images
        :param parallel: (boolean) whether, we will do asynchronous computation
        :param num_gpu: (int) number of gpus we will use for asynchronous computation
        :param input_queue: (multiprocessing.queue) containing the input images
                            (img_idx, image of shape (H, W, C) (in BGR order) and [0,255])
        :param output_queue: (multiprocessing.queue) containing the computed predictions
        :param gpuid_action_recognition: (int) the gpuid for object tracking

        """

        setup_environment()
        # Setup logging format
        logging.setup_logging(cfg.OUTPUT_DIR)

        # The cfg file for the prototype
        self.cfg = cfg

        # The original image resolution: used for resizing provided images
        self.img_height = img_height
        self.img_width = img_width

        # We only use the demo config
        self.detectron2_cfg_file = self.cfg.DETECTRON.DETECTION_MODEL_CFG
        self.detectron2_model_weights = self.cfg.DETECTRON.MODEL_WEIGHTS
        self.detectron2_score_tresh_test = self.cfg.DETECTRON.DEMO_PERSON_SCORE_THRESH

        # Load the detectron config
        self.detectron_config = self.setup_detectron_config()

        # Can be useful for displaying the object classes
        self.metadata = MetadataCatalog.get(
            self.detectron_config.DATASETS.TEST[0]
            if len(self.detectron_config.DATASETS.TEST) else "__unused")
        self.cpu_device = torch.device("cpu")

        # Determines whether we will use async processing
        self.parallel = parallel
        if self.parallel:
            # Used for async processing
            self.predictor = AsyncPredictor(
                self.cfg,
                self.detectron_config,
                self.img_height,
                self.img_width,
                num_gpus=num_gpu,
                input_queue=input_queue,
                output_queue=output_queue,
                gpuid_action_recognition=gpuid_action_recognition)
            # Used to count the frames provided for detect_persons
            self.provided_image_count = 0
            self.buffer_size = self.predictor.default_buffer_size
            # In the original version this attribute was used to store
            # the images in chronological order as well as a counter that represents the size of the task_queue
            # attribute. Since we do not return the images, we only use it as a counter representing the task_queue and
            # thus insert a dummy int variable instead of an image, because it is more memory efficient
            self.frame_data = deque()
        else:
            # Use the modified predictor for the demo
            self.predictor = DemoDefaultPredictor(self.cfg,
                                                  self.detectron_config,
                                                  self.img_height,
                                                  self.img_width)
コード例 #27
0
def demo(cfg):
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging()

    # Print config.
    logger.info("Run demo with config:")
    logger.info(cfg)
    # Build the video model and print model statistics.
    model = model_builder.build_model(cfg)
    model.eval()
    misc.log_model_info(model)

    # Load a checkpoint to test if applicable.
    if cfg.TEST.CHECKPOINT_FILE_PATH != "":
        ckpt = cfg.TEST.CHECKPOINT_FILE_PATH
    elif cu.has_checkpoint(cfg.OUTPUT_DIR):
        ckpt = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
    elif cfg.TRAIN.CHECKPOINT_FILE_PATH != "":
        # If no checkpoint found in TEST.CHECKPOINT_FILE_PATH or in the current
        # checkpoint folder, try to load checkpoint from
        # TRAIN.CHECKPOINT_FILE_PATH and test it.
        ckpt = cfg.TRAIN.CHECKPOINT_FILE_PATH
    else:
        raise NotImplementedError("Unknown way to load checkpoint.")

    cu.load_checkpoint(
        ckpt,
        model,
        cfg.NUM_GPUS > 1,
        None,
        inflation=False,
        convert_from_caffe2="caffe2"
        in [cfg.TEST.CHECKPOINT_TYPE, cfg.TRAIN.CHECKPOINT_TYPE],
    )

    if cfg.DETECTION.ENABLE:
        # Load object detector from detectron2
        dtron2_cfg_file = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_CFG
        dtron2_cfg = get_cfg()
        dtron2_cfg.merge_from_file(model_zoo.get_config_file(dtron2_cfg_file))
        dtron2_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = .5
        dtron2_cfg.MODEL.WEIGHTS = cfg.DEMO.DETECTRON2_OBJECT_DETECTION_MODEL_WEIGHTS
        object_predictor = DefaultPredictor(dtron2_cfg)
        # Load the labels of AVA dataset
        with open(cfg.DEMO.LABEL_FILE_PATH) as f:
            labels = f.read().split('\n')[:-1]
        palette = np.random.randint(64, 128, (len(labels), 3)).tolist()
        boxes = []
    else:
        # Load the labels of Kinectics-400 dataset
        labels_df = pd.read_csv(cfg.DEMO.LABEL_FILE_PATH)
        labels = labels_df['name'].values

    frame_provider = VideoReader(cfg)
    seq_len = cfg.DATA.NUM_FRAMES * cfg.DATA.SAMPLING_RATE
    frames = []
    pred_labels = []
    s = 0.
    for able_to_read, frame in frame_provider:
        if not able_to_read:
            # when reaches the end frame, clear the buffer and continue to the next one.
            frames = []
            continue

        if len(frames) != seq_len:
            frame_processed = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame_processed = scale(cfg.DATA.TEST_CROP_SIZE, frame_processed)
            frames.append(frame_processed)
            if cfg.DETECTION.ENABLE and len(frames) == seq_len // 2 - 1:
                mid_frame = frame

        if len(frames) == seq_len:
            start = time()
            if cfg.DETECTION.ENABLE:
                outputs = object_predictor(mid_frame)
                fields = outputs["instances"]._fields
                pred_classes = fields["pred_classes"]
                selection_mask = pred_classes == 0
                # acquire person boxes
                pred_classes = pred_classes[selection_mask]
                pred_boxes = fields["pred_boxes"].tensor[selection_mask]
                scores = fields["scores"][selection_mask]
                boxes = cv2_transform.scale_boxes(
                    cfg.DATA.TEST_CROP_SIZE, pred_boxes,
                    frame_provider.display_height,
                    frame_provider.display_width)
                boxes = torch.cat(
                    [torch.full((boxes.shape[0], 1), float(0)).cuda(), boxes],
                    axis=1)

            inputs = torch.as_tensor(frames).float()
            inputs = inputs / 255.0
            # Perform color normalization.
            inputs = inputs - torch.tensor(cfg.DATA.MEAN)
            inputs = inputs / torch.tensor(cfg.DATA.STD)
            # T H W C -> C T H W.
            inputs = inputs.permute(3, 0, 1, 2)

            # 1 C T H W.
            inputs = inputs.unsqueeze(0)

            # Sample frames for the fast pathway.
            index = torch.linspace(0, inputs.shape[2] - 1,
                                   cfg.DATA.NUM_FRAMES).long()
            fast_pathway = torch.index_select(inputs, 2, index)
            # logger.info('fast_pathway.shape={}'.format(fast_pathway.shape))

            # Sample frames for the slow pathway.
            index = torch.linspace(0, fast_pathway.shape[2] - 1,
                                   fast_pathway.shape[2] //
                                   cfg.SLOWFAST.ALPHA).long()
            slow_pathway = torch.index_select(fast_pathway, 2, index)
            # logger.info('slow_pathway.shape={}'.format(slow_pathway.shape))
            inputs = [slow_pathway, fast_pathway]
            """
            # Transfer the data to the current GPU device.
            if isinstance(inputs, (list,)):
                for i in range(len(inputs)):
                    inputs[i] = inputs[i].cuda(non_blocking=True)
            else:
                inputs = inputs.cuda(non_blocking=True)
            """
            # Perform the forward pass.
            if cfg.DETECTION.ENABLE:
                # When there is nothing in the scene,
                #   use a dummy variable to disable all computations below.
                if not len(boxes):
                    preds = torch.tensor([])
                else:
                    preds = model(inputs, boxes)
            else:
                preds = model(inputs)

            # Gather all the predictions across all the devices to perform ensemble.
            if cfg.NUM_GPUS > 1:
                preds = du.all_gather(preds)[0]

            if cfg.DETECTION.ENABLE:
                # This post processing was intendedly assigned to the cpu since my laptop GPU
                #   RTX 2080 runs out of its memory, if your GPU is more powerful, I'd recommend
                #   to change this section to make CUDA does the processing.
                preds = preds.cpu().detach().numpy()
                pred_masks = preds > .1
                label_ids = [
                    np.nonzero(pred_mask)[0] for pred_mask in pred_masks
                ]
                pred_labels = [[
                    labels[label_id] for label_id in perbox_label_ids
                ] for perbox_label_ids in label_ids]
                # I'm unsure how to detectron2 rescales boxes to image original size, so I use
                #   input boxes of slowfast and rescale back it instead, it's safer and even if boxes
                #   was not rescaled by cv2_transform.rescale_boxes, it still works.
                boxes = boxes.cpu().detach().numpy()
                ratio = np.min([
                    frame_provider.display_height, frame_provider.display_width
                ]) / cfg.DATA.TEST_CROP_SIZE
                boxes = boxes[:, 1:] * ratio
            else:
                ## Option 1: single label inference selected from the highest probability entry.
                # label_id = preds.argmax(-1).cpu()
                # pred_label = labels[label_id]
                # Option 2: multi-label inferencing selected from probability entries > threshold
                label_ids = torch.nonzero(
                    preds.squeeze() > .1).reshape(-1).cpu().detach().numpy()
                pred_labels = labels[label_ids]
                logger.info(pred_labels)
                if not list(pred_labels):
                    pred_labels = ['Unknown']

            # # option 1: remove the oldest frame in the buffer to make place for the new one.
            # frames.pop(0)
            # option 2: empty the buffer
            frames = []
            s = time() - start

        if cfg.DETECTION.ENABLE and pred_labels and boxes.any():
            for box, box_labels in zip(boxes.astype(int), pred_labels):
                cv2.rectangle(frame,
                              tuple(box[:2]),
                              tuple(box[2:]), (0, 255, 0),
                              thickness=2)
                label_origin = box[:2]
                for label in box_labels:
                    label_origin[-1] -= 5
                    (label_width, label_height), _ = cv2.getTextSize(
                        label, cv2.FONT_HERSHEY_SIMPLEX, .5, 2)
                    cv2.rectangle(frame,
                                  (label_origin[0], label_origin[1] + 5),
                                  (label_origin[0] + label_width,
                                   label_origin[1] - label_height - 5),
                                  palette[labels.index(label)], -1)
                    cv2.putText(frame, label, tuple(label_origin),
                                cv2.FONT_HERSHEY_SIMPLEX, .5, (255, 255, 255),
                                1)
                    label_origin[-1] -= label_height + 5
        if not cfg.DETECTION.ENABLE:
            # Display predicted labels to frame.
            y_offset = 50
            cv2.putText(frame,
                        'Action:', (10, y_offset),
                        fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                        fontScale=.65,
                        color=(0, 235, 0),
                        thickness=2)
            for pred_label in pred_labels:
                y_offset += 30
                cv2.putText(frame,
                            '{}'.format(pred_label), (20, y_offset),
                            fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                            fontScale=.65,
                            color=(0, 235, 0),
                            thickness=2)

        # Display prediction speed
        cv2.putText(frame,
                    'Speed: {:.2f}s'.format(s), (10, 25),
                    fontFace=cv2.FONT_HERSHEY_SIMPLEX,
                    fontScale=.65,
                    color=(0, 235, 0),
                    thickness=2)
        # Display the frame
        cv2.imshow('SlowFast', frame)
        # hit Esc to quit the demo.
        key = cv2.waitKey(1)
        if key == 27:
            break

    frame_provider.clean()
コード例 #28
0
ファイル: env.py プロジェクト: vinhnguyen21/SlowFast
def setup_environment():
    # Setup logging format.
    logging.setup_logging()
コード例 #29
0
ファイル: train_net.py プロジェクト: bqhuyy/SlowFast
def train(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set up environment.
    du.init_distributed_training(cfg)
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)

    # Init multigrid.
    multigrid = None
    if cfg.MULTIGRID.LONG_CYCLE or cfg.MULTIGRID.SHORT_CYCLE:
        multigrid = MultigridSchedule()
        cfg = multigrid.init_multigrid(cfg)
        if cfg.MULTIGRID.LONG_CYCLE:
            cfg, _ = multigrid.update_long_cycle(cfg, cur_epoch=0)
    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = build_model(cfg)
    if du.is_master_proc() and cfg.LOG_MODEL_INFO:
        misc.log_model_info(model, cfg, use_train_input=True)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Load a checkpoint to resume training if applicable.
    start_epoch = cu.load_train_checkpoint(cfg, model, optimizer)

    # Create the video train and val loaders.
    train_loader = loader.construct_loader(cfg, "train")
    val_loader = loader.construct_loader(cfg, "val")
    precise_bn_loader = (loader.construct_loader(
        cfg, "train", is_precise_bn=True)
                         if cfg.BN.USE_PRECISE_STATS else None)

    # Create meters.
    if cfg.DETECTION.ENABLE:
        train_meter = AVAMeter(len(train_loader), cfg, mode="train")
        val_meter = AVAMeter(len(val_loader), cfg, mode="val")
    else:
        train_meter = TrainMeter(len(train_loader), cfg)
        val_meter = ValMeter(len(val_loader), cfg)

    # set up writer for logging to Tensorboard format.
    if cfg.TENSORBOARD.ENABLE and du.is_master_proc(
            cfg.NUM_GPUS * cfg.NUM_SHARDS):
        writer = tb.TensorboardWriter(cfg)
    else:
        writer = None

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))

    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        if cfg.MULTIGRID.LONG_CYCLE:
            cfg, changed = multigrid.update_long_cycle(cfg, cur_epoch)
            if changed:
                (
                    model,
                    optimizer,
                    train_loader,
                    val_loader,
                    precise_bn_loader,
                    train_meter,
                    val_meter,
                ) = build_trainer(cfg)

                # Load checkpoint.
                if cu.has_checkpoint(cfg.OUTPUT_DIR):
                    last_checkpoint = cu.get_last_checkpoint(cfg.OUTPUT_DIR)
                    assert "{:05d}.pyth".format(cur_epoch) in last_checkpoint
                else:
                    last_checkpoint = cfg.TRAIN.CHECKPOINT_FILE_PATH
                logger.info("Load from {}".format(last_checkpoint))
                cu.load_checkpoint(last_checkpoint, model, cfg.NUM_GPUS > 1,
                                   optimizer)

        # Shuffle the dataset.
        loader.shuffle_dataset(train_loader, cur_epoch)

        # Train for one epoch.
        train_epoch(train_loader, model, optimizer, train_meter, cur_epoch,
                    cfg, writer)

        is_checkp_epoch = (cu.is_checkpoint_epoch(
            cfg,
            cur_epoch,
            None if multigrid is None else multigrid.schedule,
        ))
        is_eval_epoch = misc.is_eval_epoch(
            cfg, cur_epoch, None if multigrid is None else multigrid.schedule)

        # Compute precise BN stats.
        if ((is_checkp_epoch or is_eval_epoch) and cfg.BN.USE_PRECISE_STATS
                and len(get_bn_modules(model)) > 0):
            calculate_and_update_precise_bn(
                precise_bn_loader,
                model,
                min(cfg.BN.NUM_BATCHES_PRECISE, len(precise_bn_loader)),
                cfg.NUM_GPUS > 0,
            )
        _ = misc.aggregate_sub_bn_stats(model)

        # Save a checkpoint.
        if is_checkp_epoch:
            cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch,
                               cfg)
        # Evaluate the model on validation set.
        if is_eval_epoch:
            eval_epoch(val_loader, model, val_meter, cur_epoch, cfg, writer)

    if writer is not None:
        writer.close()
コード例 #30
0
ファイル: train_net_des.py プロジェクト: gabrielsluz/SlowFast
def train_des(cfg):
    """
    Train a video model for many epochs on train set and evaluate it on val set.
    Args:
        cfg (CfgNode): configs. Details can be found in
            slowfast/config/defaults.py
    """
    # Set random seed from configs.
    np.random.seed(cfg.RNG_SEED)
    torch.manual_seed(cfg.RNG_SEED)

    # Setup logging format.
    logging.setup_logging(cfg.OUTPUT_DIR)
    # Print config.
    logger.info("Train with config:")
    logger.info(pprint.pformat(cfg))

    # Build the video model and print model statistics.
    model = build_clevrer_model(cfg)

    # Construct the optimizer.
    optimizer = optim.construct_optimizer(model, cfg)

    # Load a checkpoint to resume training if applicable.
    start_epoch = cu.load_train_checkpoint(cfg, model, optimizer)
    # Create the video train and val loaders.
    if cfg.TRAIN.DATASET != 'Clevrer_des':
        print("This train script does not support your dataset: -{}-. Only Clevrer_des".format(cfg.TRAIN.DATASET))
        exit()
    # Create the video train and val loaders.
    train_loader = build_dataloader(cfg, "train")
    val_loader = build_dataloader(cfg, "val")

    # Create meters.
    train_meter = ClevrerTrainMeter(len(train_loader), cfg)
    val_meter = ClevrerValMeter(len(val_loader), cfg)

    # Perform the training loop.
    logger.info("Start epoch: {}".format(start_epoch + 1))
    for cur_epoch in range(start_epoch, cfg.SOLVER.MAX_EPOCH):
        # Shuffle the dataset.
        #loader.shuffle_dataset(train_loader, cur_epoch)
        # Train for one epoch.
        train_epoch(
            train_loader, model, optimizer, train_meter, cur_epoch, cfg
        )

        is_checkp_epoch = cu.is_checkpoint_epoch(
            cfg,
            cur_epoch,
            None,
        )
        is_eval_epoch = misc.is_eval_epoch(
            cfg, cur_epoch, None
        )

        # Save a checkpoint.
        # if is_checkp_epoch:
        #     cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_epoch, cfg)
        # Evaluate the model on validation set.
        if is_eval_epoch:
            eval_epoch(val_loader, model, val_meter, cur_epoch, cfg)