예제 #1
0
        "filter_agents_threshold": 0.5,
    },
    "train_data_loader": {
        "key": "scenes/train.zarr",
        "batch_size": 32,
        "shuffle": True,
        "num_workers": 4,
    },
}

future_n_frames = cfg["model_params"]["future_num_frames"]
n_trajectories = 3
model = ModelWithConfidence(
    backbone=resnet34(
        pretrained=True,
        in_channels=6,
        num_classes=2 * future_n_frames * n_trajectories + n_trajectories,
    ),
    future_num_frames=future_n_frames,
    num_trajectories=n_trajectories,
)

load_checkpoint(checkpoint_path, model)
model = model.eval()

device = torch.device("cuda:0")
model = model.to(device)

valid_mask = np.load(
    f"{DATA_DIR}/scenes/validate_chopped_100/mask.npz")["arr_0"]
예제 #2
0
def experiment(logdir, device) -> None:
    """Experiment function

    Args:
        logdir (Path): directory where should be placed logs
        device (str): device name to use
    """
    tb_dir = logdir / "tensorboard"
    main_metric = "loss"
    minimize_metric = True

    seed_all()

    history_n_frames = cfg["model_params"]["history_num_frames"]
    future_n_frames = cfg["model_params"]["future_num_frames"]
    n_trajectories = 3
    model = ModelWithConfidence(
        backbone=resnet34(
            pretrained=True,
            in_channels=6,
            num_classes=2 * future_n_frames * n_trajectories + n_trajectories,
        ),
        future_num_frames=future_n_frames,
        num_trajectories=n_trajectories,
    )
    # model = nn.DataParallel(model)
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = neg_multi_log_likelihood_batch
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

    with TensorboardLogger(tb_dir) as tb:
        stage = "stage_0"
        n_epochs = 3
        print(f"Stage - {stage}")

        checkpointer = CheckpointManager(
            logdir=logdir / stage,
            metric=main_metric,
            metric_minimization=minimize_metric,
            save_n_best=5,
        )

        train_loader, valid_loader = get_loaders(train_batch_size=128,
                                                 valid_batch_size=128)

        for epoch in range(1, n_epochs + 1):
            epoch_start_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                             time.localtime())
            print(f"[{epoch_start_time}]\n[Epoch {epoch}/{n_epochs}]")

            train_metrics = train_fn(model,
                                     train_loader,
                                     device,
                                     criterion,
                                     optimizer,
                                     tensorboard_logger=tb,
                                     logdir=logdir / f'{epoch}_epoch')
            log_metrics(stage, train_metrics, tb, "train", epoch)

            valid_metrics = train_metrics
            # valid_metrics = valid_fn(model, valid_loader, device, criterion)
            # log_metrics(stage, valid_metrics, tb, "valid", epoch)

            checkpointer.process(
                metric_value=valid_metrics[main_metric],
                epoch=epoch,
                checkpoint=make_checkpoint(
                    stage,
                    epoch,
                    model,
                    optimizer,
                    scheduler,
                    metrics={
                        "train": train_metrics,
                        "valid": valid_metrics
                    },
                ),
            )

            scheduler.step()