def experiment(logdir, device) -> None: """Experiment function Args: logdir (Path): directory where should be placed logs device (str): device name to use """ tb_dir = logdir / "tensorboard" main_metric = "loss" minimize_metric = True seed_all() history_n_frames = cfg["model_params"]["history_num_frames"] future_n_frames = cfg["model_params"]["future_num_frames"] n_trajectories = 3 model = ModelWithConfidence( backbone=resnet18( pretrained=True, in_channels=3 + 2 * (history_n_frames + 1), num_classes=2 * future_n_frames * n_trajectories + n_trajectories, ), future_num_frames=future_n_frames, num_trajectories=n_trajectories, ) # model = nn.DataParallel(model) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=1e-3) criterion = neg_multi_log_likelihood_batch scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100) with TensorboardLogger(tb_dir) as tb: stage = "stage_0" n_epochs = 1 print(f"Stage - {stage}") checkpointer = CheckpointManager( logdir=logdir / stage, metric=main_metric, metric_minimization=minimize_metric, save_n_best=5, ) train_loader, valid_loader = get_loaders(train_batch_size=32, valid_batch_size=32) for epoch in range(1, n_epochs + 1): epoch_start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(f"[{epoch_start_time}]\n[Epoch {epoch}/{n_epochs}]") train_metrics = train_fn(model, train_loader, device, criterion, optimizer) log_metrics(stage, train_metrics, tb, "train", epoch) valid_metrics = valid_fn(model, valid_loader, device, criterion) log_metrics(stage, valid_metrics, tb, "valid", epoch) checkpointer.process( metric_value=valid_metrics[main_metric], epoch=epoch, checkpoint=make_checkpoint( stage, epoch, model, optimizer, scheduler, metrics={ "train": train_metrics, "valid": valid_metrics }, ), ) scheduler.step()
def experiment(logdir: Path, device: torch.device) -> None: """Experiment function Args: logdir (Path): directory where should be placed logs device (str): device name to use """ tb_dir = logdir / "tensorboard" main_metric = "gap" minimize_metric = False seed_all() model = EncoderWithHead( EfficientNetEncoder("efficientnet-b0", EMBEDDING_SIZE, bias=False), CosFace(EMBEDDING_SIZE, NUM_CLASSESS, None), ) load_checkpoint("./logs/full_set5/stage_0/best.pth", model) # model.head.s = 64.0 # np.sqrt(2) * np.log(NUM_CLASSESS - 1) # model.head.m = 0.35 # # freeze backbone of encoder # for parameter in model.encoder.base._blocks.parameters(): # parameter.requires_grad = False model = nn.DataParallel(model) model = model.to(device) optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-5) criterion = nn.CrossEntropyLoss(weight=get_class_weights()).to(device) scheduler = None with TensorboardLogger(tb_dir) as tb: stage = "stage_0" n_epochs = 5 print(f"Stage - '{stage}'") checkpointer = CheckpointManager( logdir=logdir / stage, metric=main_metric, metric_minimization=minimize_metric, save_n_best=5, ) train_loader, valid_loader = get_loaders(stage, train_bs=32, valid_bs=64) for epoch in range(1, n_epochs + 1): epoch_start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(f"[{epoch_start_time}]\n[Epoch {epoch}/{n_epochs}]") train_metrics = train_fn(model, train_loader, device, criterion, optimizer) log_metrics(stage, train_metrics, tb, "train", epoch) valid_metrics = valid_fn(model, valid_loader, device, criterion) log_metrics(stage, valid_metrics, tb, "valid", epoch) checkpointer.process( metric_value=valid_metrics[main_metric], epoch=epoch, checkpoint=make_checkpoint( stage, epoch, model, optimizer, scheduler, metrics={ "train": train_metrics, "valid": valid_metrics }, ), )
def experiment(logdir: Path, device: torch.device) -> None: """Experiment function Args: logdir (Path): directory where should be placed logs device (str): device name to use """ tb_dir = logdir / "tensorboard" main_metric = "gap" minimize_metric = False seed_all() model = EncoderWithHead( EfficientNetEncoder("efficientnet-b5", EMBEDDING_SIZE, bias=True), CosFace( EMBEDDING_SIZE, NUM_CLASSESS, device_id=None, s=np.sqrt(2) * np.log(NUM_CLASSESS - 1), m=0.1, ), ) model = nn.DataParallel(model) model = model.to(device) optimizer = optim.SGD( model.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-5, ) criterion = nn.CrossEntropyLoss() scheduler = None with TensorboardLogger(tb_dir) as tb: stage = "stage_0" n_epochs = 10 print(f"Stage - '{stage}'") checkpointer = CheckpointManager( logdir=logdir / stage, metric=main_metric, metric_minimization=minimize_metric, save_n_best=5, ) train_loader, valid_loader = get_loaders(stage, train_bs=48, valid_bs=128) for epoch in range(1, n_epochs + 1): epoch_start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(f"[{epoch_start_time}]\n[Epoch {epoch}/{n_epochs}]") train_metrics = train_fn(model, train_loader, device, criterion, optimizer) log_metrics(stage, train_metrics, tb, "train", epoch) valid_metrics = valid_fn(model, valid_loader, device, criterion) log_metrics(stage, valid_metrics, tb, "valid", epoch) checkpointer.process( metric_value=valid_metrics[main_metric], epoch=epoch, checkpoint=make_checkpoint( stage, epoch, model, optimizer, scheduler, metrics={ "train": train_metrics, "valid": valid_metrics }, ), )
def experiment(logdir, device) -> None: """Experiment function Args: logdir (Path): directory where should be placed logs device (str): device name to use """ tb_dir = logdir / "tensorboard" main_metric = "loss" minimize_metric = True seed_all() history_n_frames = cfg["model_params"]["history_num_frames"] future_n_frames = cfg["model_params"]["future_num_frames"] n_trajectories = 3 model = ModelWithConfidence( backbone=resnet34_accel( pretrained=True, in_channels=3 + 3, num_classes=2 * future_n_frames * n_trajectories + n_trajectories, in_accel_features=(history_n_frames - 1) * 2, num_accel_features=32, ), future_num_frames=future_n_frames, num_trajectories=n_trajectories, ) load_checkpoint( "./logs/resnet34_frast_fulldata_confidence_25hist_accel/epoch_1/train_689999.pth", model, ) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=1e-3) criterion = neg_multi_log_likelihood_batch scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100) with TensorboardLogger(tb_dir) as tb: stage = "stage_0" n_epochs = 1 print(f"Stage - {stage}") checkpointer = CheckpointManager( logdir=logdir / stage, metric=main_metric, metric_minimization=minimize_metric, save_n_best=5, ) train_loader, (valid_loader, valid_gt_path) = get_loaders( train_batch_size=32, valid_batch_size=32 ) valid_func = partial( valid_fn, loader=valid_loader, ground_truth_file=valid_gt_path, logdir=logdir, verbose=True, ) for epoch in range(1, n_epochs + 1): epoch_start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(f"[{epoch_start_time}]\n[Epoch {epoch}/{n_epochs}]") # try: train_metrics = train_fn( model, train_loader, device, criterion, optimizer, tensorboard_logger=tb, logdir=logdir / f"epoch_{epoch}", validation_fn=valid_func, ) log_metrics(stage, train_metrics, tb, "train", epoch) # except BaseException: # train_metrics = {"message": "An exception occured!"} valid_metrics = valid_fn(model, valid_loader, device, valid_gt_path, logdir) log_metrics(stage, valid_metrics, tb, "valid", epoch) checkpointer.process( metric_value=valid_metrics["score"], epoch=epoch, checkpoint=make_checkpoint( stage, epoch, model, optimizer, scheduler, metrics={"train": train_metrics, "valid": valid_metrics}, ), )
def experiment(logdir, device) -> None: """Experiment function Args: logdir (Path): directory where should be placed logs device (str): device name to use """ tb_dir = logdir / "tensorboard" main_metric = "score" minimize_metric = True seed_all() history_n_frames = cfg["model_params"]["history_num_frames"] future_n_frames = cfg["model_params"]["future_num_frames"] n_trajectories = 3 model = ModelWithConfidence( backbone=resnet18( pretrained=True, in_channels=3 + 2 * (history_n_frames + 1), num_classes=2 * future_n_frames * n_trajectories + n_trajectories, ), future_num_frames=future_n_frames, num_trajectories=n_trajectories, ) # model = nn.DataParallel(model) model = model.to(device) # optimizer = optim.Adam(model.parameters(), lr=1e-3) optimizer = optim.SGD(model.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.CyclicLR( optimizer, base_lr=1e-4, max_lr=1e-3, step_size_up=120_000, cycle_momentum=True, mode="triangular2", ) load_checkpoint( "./logs/resnet18_bigerimages_continue4_chopped/epoch_1/train_25868.pth", model, # optimizer, ) criterion = neg_multi_log_likelihood_batch with TensorboardLogger(tb_dir) as tb: stage = "stage_0" n_epochs = 1 print(f"Stage - {stage}") checkpointer = CheckpointManager( logdir=logdir / stage, metric=main_metric, metric_minimization=minimize_metric, save_n_best=5, ) train_loader, (valid_loader, valid_gt_path) = get_loaders( train_batch_size=32, valid_batch_size=32 ) valid_func = partial( valid_fn, loader=valid_loader, ground_truth_file=valid_gt_path, logdir=logdir, verbose=True, ) for epoch in range(1, n_epochs + 1): epoch_start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) print(f"[{epoch_start_time}]\n[Epoch {epoch}/{n_epochs}]") try: train_metrics = train_fn( model, train_loader, device, criterion, optimizer, scheduler=scheduler, tensorboard_logger=tb, logdir=logdir / f"epoch_{epoch}", validation_fn=valid_func, ) log_metrics(stage, train_metrics, tb, "train", epoch) except BaseException: train_metrics = {"message": "An exception occured!"} # valid_metrics = train_metrics valid_metrics = valid_fn(model, valid_loader, device, valid_gt_path, logdir) log_metrics(stage, valid_metrics, tb, "valid", epoch) checkpointer.process( metric_value=valid_metrics[main_metric], epoch=epoch, checkpoint=make_checkpoint( stage, epoch, model, optimizer, scheduler, metrics={"train": train_metrics, "valid": valid_metrics}, ), )