コード例 #1
0
def experiment(logdir, device) -> None:
    """Experiment function

    Args:
        logdir (Path): directory where should be placed logs
        device (str): device name to use
    """
    tb_dir = logdir / "tensorboard"
    main_metric = "loss"
    minimize_metric = True

    seed_all()

    history_n_frames = cfg["model_params"]["history_num_frames"]
    future_n_frames = cfg["model_params"]["future_num_frames"]
    n_trajectories = 3
    model = ModelWithConfidence(
        backbone=resnet18(
            pretrained=True,
            in_channels=3 + 2 * (history_n_frames + 1),
            num_classes=2 * future_n_frames * n_trajectories + n_trajectories,
        ),
        future_num_frames=future_n_frames,
        num_trajectories=n_trajectories,
    )
    # model = nn.DataParallel(model)
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = neg_multi_log_likelihood_batch
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

    with TensorboardLogger(tb_dir) as tb:
        stage = "stage_0"
        n_epochs = 1
        print(f"Stage - {stage}")

        checkpointer = CheckpointManager(
            logdir=logdir / stage,
            metric=main_metric,
            metric_minimization=minimize_metric,
            save_n_best=5,
        )

        train_loader, valid_loader = get_loaders(train_batch_size=32,
                                                 valid_batch_size=32)

        for epoch in range(1, n_epochs + 1):
            epoch_start_time = time.strftime("%Y-%m-%d %H:%M:%S",
                                             time.localtime())
            print(f"[{epoch_start_time}]\n[Epoch {epoch}/{n_epochs}]")

            train_metrics = train_fn(model, train_loader, device, criterion,
                                     optimizer)
            log_metrics(stage, train_metrics, tb, "train", epoch)

            valid_metrics = valid_fn(model, valid_loader, device, criterion)
            log_metrics(stage, valid_metrics, tb, "valid", epoch)

            checkpointer.process(
                metric_value=valid_metrics[main_metric],
                epoch=epoch,
                checkpoint=make_checkpoint(
                    stage,
                    epoch,
                    model,
                    optimizer,
                    scheduler,
                    metrics={
                        "train": train_metrics,
                        "valid": valid_metrics
                    },
                ),
            )

            scheduler.step()
コード例 #2
0
def experiment(logdir, device) -> None:
    """Experiment function

    Args:
        logdir (Path): directory where should be placed logs
        device (str): device name to use
    """
    tb_dir = logdir / "tensorboard"
    main_metric = "loss"
    minimize_metric = True

    seed_all()

    history_n_frames = cfg["model_params"]["history_num_frames"]
    future_n_frames = cfg["model_params"]["future_num_frames"]
    n_trajectories = 3
    model = ModelWithConfidence(
        backbone=resnet34_accel(
            pretrained=True,
            in_channels=3 + 3,
            num_classes=2 * future_n_frames * n_trajectories + n_trajectories,
            in_accel_features=(history_n_frames - 1) * 2,
            num_accel_features=32,
        ),
        future_num_frames=future_n_frames,
        num_trajectories=n_trajectories,
    )

    load_checkpoint(
        "./logs/resnet34_frast_fulldata_confidence_25hist_accel/epoch_1/train_689999.pth",
        model,
    )

    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    criterion = neg_multi_log_likelihood_batch
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100)

    with TensorboardLogger(tb_dir) as tb:
        stage = "stage_0"
        n_epochs = 1
        print(f"Stage - {stage}")

        checkpointer = CheckpointManager(
            logdir=logdir / stage,
            metric=main_metric,
            metric_minimization=minimize_metric,
            save_n_best=5,
        )

        train_loader, (valid_loader, valid_gt_path) = get_loaders(
            train_batch_size=32, valid_batch_size=32
        )

        valid_func = partial(
            valid_fn,
            loader=valid_loader,
            ground_truth_file=valid_gt_path,
            logdir=logdir,
            verbose=True,
        )

        for epoch in range(1, n_epochs + 1):
            epoch_start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            print(f"[{epoch_start_time}]\n[Epoch {epoch}/{n_epochs}]")

            # try:
            train_metrics = train_fn(
                model,
                train_loader,
                device,
                criterion,
                optimizer,
                tensorboard_logger=tb,
                logdir=logdir / f"epoch_{epoch}",
                validation_fn=valid_func,
            )
            log_metrics(stage, train_metrics, tb, "train", epoch)
            # except BaseException:
            # train_metrics = {"message": "An exception occured!"}

            valid_metrics = valid_fn(model, valid_loader, device, valid_gt_path, logdir)
            log_metrics(stage, valid_metrics, tb, "valid", epoch)

            checkpointer.process(
                metric_value=valid_metrics["score"],
                epoch=epoch,
                checkpoint=make_checkpoint(
                    stage,
                    epoch,
                    model,
                    optimizer,
                    scheduler,
                    metrics={"train": train_metrics, "valid": valid_metrics},
                ),
            )
コード例 #3
0
    },
    "train_data_loader": {
        "key": "scenes/train.zarr",
        "batch_size": 32,
        "shuffle": True,
        "num_workers": 4,
    },
}

future_n_frames = cfg["model_params"]["future_num_frames"]
n_trajectories = 3
model = ModelWithConfidence(
    backbone=resnet34(
        pretrained=True,
        in_channels=6,
        num_classes=2 * future_n_frames * n_trajectories + n_trajectories,
    ),
    future_num_frames=future_n_frames,
    num_trajectories=n_trajectories,
)

load_checkpoint(checkpoint_path, model)
model = model.eval()

device = torch.device("cuda:0")
model = model.to(device)

valid_mask = np.load(
    f"{DATA_DIR}/scenes/validate_chopped_100/mask.npz")["arr_0"]

dm = LocalDataManager(DATA_DIR)
コード例 #4
0
        "key": "scenes/train.zarr",
        "batch_size": 32,
        "shuffle": True,
        "num_workers": 4,
    },
}

history_n_frames = cfg["model_params"]["history_num_frames"]
future_n_frames = cfg["model_params"]["future_num_frames"]
n_trajectories = 3
model = ModelWithConfidence(
    backbone=resnet34_accel(
        pretrained=True,
        in_channels=6,
        num_classes=2 * future_n_frames * n_trajectories + n_trajectories,
        in_accel_features=(history_n_frames - 1) * 2,
        num_accel_features=32,
    ),
    future_num_frames=future_n_frames,
    num_trajectories=n_trajectories,
)

load_checkpoint(checkpoint_path, model)
model = model.eval()

device = torch.device("cuda:0")
model = model.to(device)

valid_mask = np.load(
    f"{DATA_DIR}/scenes/validate_chopped_100/mask.npz")["arr_0"]
コード例 #5
0
def experiment(logdir, device) -> None:
    """Experiment function

    Args:
        logdir (Path): directory where should be placed logs
        device (str): device name to use
    """
    tb_dir = logdir / "tensorboard"
    main_metric = "score"
    minimize_metric = True

    seed_all()

    history_n_frames = cfg["model_params"]["history_num_frames"]
    future_n_frames = cfg["model_params"]["future_num_frames"]
    n_trajectories = 3
    model = ModelWithConfidence(
        backbone=resnet18(
            pretrained=True,
            in_channels=3 + 2 * (history_n_frames + 1),
            num_classes=2 * future_n_frames * n_trajectories + n_trajectories,
        ),
        future_num_frames=future_n_frames,
        num_trajectories=n_trajectories,
    )
    # model = nn.DataParallel(model)
    model = model.to(device)
    # optimizer = optim.Adam(model.parameters(), lr=1e-3)
    optimizer = optim.SGD(model.parameters(), lr=1e-4)
    scheduler = optim.lr_scheduler.CyclicLR(
        optimizer,
        base_lr=1e-4,
        max_lr=1e-3,
        step_size_up=120_000,
        cycle_momentum=True,
        mode="triangular2",
    )

    load_checkpoint(
        "./logs/resnet18_bigerimages_continue4_chopped/epoch_1/train_25868.pth",
        model,
        # optimizer,
    )

    criterion = neg_multi_log_likelihood_batch

    with TensorboardLogger(tb_dir) as tb:
        stage = "stage_0"
        n_epochs = 1
        print(f"Stage - {stage}")

        checkpointer = CheckpointManager(
            logdir=logdir / stage,
            metric=main_metric,
            metric_minimization=minimize_metric,
            save_n_best=5,
        )

        train_loader, (valid_loader, valid_gt_path) = get_loaders(
            train_batch_size=32, valid_batch_size=32
        )

        valid_func = partial(
            valid_fn,
            loader=valid_loader,
            ground_truth_file=valid_gt_path,
            logdir=logdir,
            verbose=True,
        )

        for epoch in range(1, n_epochs + 1):
            epoch_start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
            print(f"[{epoch_start_time}]\n[Epoch {epoch}/{n_epochs}]")

            try:
                train_metrics = train_fn(
                    model,
                    train_loader,
                    device,
                    criterion,
                    optimizer,
                    scheduler=scheduler,
                    tensorboard_logger=tb,
                    logdir=logdir / f"epoch_{epoch}",
                    validation_fn=valid_func,
                )
                log_metrics(stage, train_metrics, tb, "train", epoch)
            except BaseException:
                train_metrics = {"message": "An exception occured!"}

            # valid_metrics = train_metrics
            valid_metrics = valid_fn(model, valid_loader, device, valid_gt_path, logdir)
            log_metrics(stage, valid_metrics, tb, "valid", epoch)

            checkpointer.process(
                metric_value=valid_metrics[main_metric],
                epoch=epoch,
                checkpoint=make_checkpoint(
                    stage,
                    epoch,
                    model,
                    optimizer,
                    scheduler,
                    metrics={"train": train_metrics, "valid": valid_metrics},
                ),
            )