Esempio n. 1
0
def test_nested_model_sessions():
    model = torch.nn.Sequential(torch.nn.Linear(1, 1), torch.nn.Dropout(0.1))
    print(model.training)
    with TorchEvalSession(model):
        print(model.training)
        with TorchTrainSession(model):
            print(model.training)
            with TorchEvalSession(model):
                print(model.training)
                with TorchTrainSession(model):
                    print(model.training)
                    with TorchEvalSession(model):
                        print(model.training)
    print(model.training)
Esempio n. 2
0
def train_model(
    model,
    optimiser,
    epoch_i: int,
    metric_writer: Writer,
    loader: DataLoader,
    log_interval=10,
):
    with TorchTrainSession(model):
        train_accum_loss = 0
        generator = tqdm(enumerate(loader))
        for batch_idx, (original, *_) in generator:
            original = original.to(global_torch_device())

            optimiser.zero_grad()
            reconstruction, mean, log_var = model(original)
            loss = loss_function(reconstruction, original, mean, log_var)
            loss.backward()
            optimiser.step()

            train_accum_loss += loss.item()
            metric_writer.scalar("train_loss", loss.item())

            if batch_idx % log_interval == 0:
                generator.set_description(
                    f"Train Epoch: {epoch_i}"
                    f" [{batch_idx * len(original)}/"
                    f"{len(loader.dataset)}"
                    f" ({100. * batch_idx / len(loader):.0f}%)]\t"
                    f"Loss: {loss.item() / len(original):.6f}")
            break
        print(f"====> Epoch: {epoch_i}"
              f" Average loss: {train_accum_loss / len(loader.dataset):.4f}")
Esempio n. 3
0
def single_epoch_fitting(
    model: torch.nn.Module,
    optimiser,
    train_loader_,
    *,
    epoch: int = None,
    writer: Writer = None,
    device_: torch.device = global_torch_device()) -> None:
    accum_loss = 0
    num_batches = len(train_loader_)

    with TorchTrainSession(model):
        for batch_idx, (data, target) in tqdm(enumerate(train_loader_),
                                              desc='train batch #',
                                              total=num_batches):
            loss = nll_loss(
                model(data.to(device_)).squeeze(), target.to(device_)
            )  # negative log-likelihood for a tensor of size (batch x 1 x n_output)
            optimiser.zero_grad()
            loss.backward()
            optimiser.step()
            accum_loss += loss.item()

    if writer:
        writer.scalar('loss', accum_loss / num_batches, epoch)
Esempio n. 4
0
def maskrcnn_train_single_epoch(
    *,
    model: Module,
    optimiser: torch.optim.Optimizer,
    data_loader: DataLoader,
    device: torch.device = global_torch_device(),
    writer: Writer = None,
) -> None:
    """

    :param model:
    :param optimiser:
    :param data_loader:
    :param epoch_i:
    :param log_frequency:
    :param device:
    :param writer:
    :return:
    """
    model.to(device)
    with TorchTrainSession(model):

        for images, targets in tqdm.tqdm(data_loader, desc="Batch #"):
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device)
                        for k, v in t.items()} for t in targets]

            # torch.cuda.synchronize(device)
            loss_dict = model(images, targets=targets)
            losses = sum(loss for loss in loss_dict.values())

            loss_dict_reduced = reduce_dict(
                loss_dict)  # reduce losses over all GPUs for logging purposes
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            loss_value = losses_reduced.item()

            if not math.isfinite(loss_value):
                print(f"Loss is {loss_value}, stopping training")
                print(loss_dict_reduced)
                sys.exit(1)

            optimiser.zero_grad()
            losses.backward()
            optimiser.step()

            if writer:
                for k, v in {
                        "loss": losses_reduced,
                        "lr": torch.optim.Optimizer.param_groups[0]["lr"],
                        **loss_dict_reduced,
                }.items():
                    writer.scalar(k, v)
Esempio n. 5
0
def train_siamese(
    model,
    optimiser,
    criterion,
    *,
    writer: Writer = MockWriter(),
    train_number_epochs,
    data_dir,
    train_batch_size,
    model_name,
    save_path,
    save_best=False,
    img_size,
    validation_interval: int = 1,
):
    """
    :param data_dir:
    :type data_dir:
    :param optimiser:
    :type optimiser:
    :param criterion:
    :type criterion:
    :param writer:
    :type writer:
    :param model_name:
    :type model_name:
    :param save_path:
    :type save_path:
    :param save_best:
    :type save_best:
    :param model:
    :type model:
    :param train_number_epochs:
    :type train_number_epochs:
    :param train_batch_size:
    :type train_batch_size:
    :return:
    :rtype:

      Parameters
      ----------
      img_size
      validation_interval"""

    train_dataloader = DataLoader(
        TripletDataset(
            data_path=data_dir,
            transform=transforms.Compose([
                transforms.Grayscale(),
                transforms.Resize(img_size),
                transforms.ToTensor(),
            ]),
            split=SplitEnum.training,
        ),
        shuffle=True,
        num_workers=0,
        batch_size=train_batch_size,
    )

    valid_dataloader = DataLoader(
        TripletDataset(
            data_path=data_dir,
            transform=transforms.Compose([
                transforms.Grayscale(),
                transforms.Resize(img_size),
                transforms.ToTensor(),
            ]),
            split=SplitEnum.validation,
        ),
        shuffle=True,
        num_workers=0,
        batch_size=train_batch_size,
    )

    best = math.inf

    E = tqdm(range(0, train_number_epochs))
    batch_counter = count()

    for epoch in E:
        for tss in train_dataloader:
            batch_i = next(batch_counter)
            with TorchTrainSession(model):
                optimiser.zero_grad()
                loss_contrastive = criterion(*model(
                    *[t.to(global_torch_device()) for t in tss]))
                loss_contrastive.backward()
                optimiser.step()
                a = loss_contrastive.cpu().item()
                writer.scalar("train_loss", a, batch_i)
            if batch_counter.__next__() % validation_interval == 0:
                with TorchEvalSession(model):
                    for tsv in valid_dataloader:
                        o = model(*[t.to(global_torch_device()) for t in tsv])
                        a_v = criterion(*o).cpu().item()
                        valid_positive_acc = (accuracy(
                            distances=pairwise_distance(o[0], o[1]),
                            is_diff=0).cpu().item())
                        valid_negative_acc = (accuracy(
                            distances=pairwise_distance(o[0], o[2]),
                            is_diff=1).cpu().item())
                        valid_acc = numpy.mean(
                            (valid_negative_acc, valid_positive_acc))
                        writer.scalar("valid_loss", a_v, batch_i)
                        writer.scalar("valid_positive_acc", valid_positive_acc,
                                      batch_i)
                        writer.scalar("valid_negative_acc", valid_negative_acc,
                                      batch_i)
                        writer.scalar("valid_acc", valid_acc, batch_i)
                        if a_v < best:
                            best = a_v
                            print(f"new best {best}")
                            if save_best:
                                save_model_parameters(
                                    model,
                                    optimiser=optimiser,
                                    model_name=model_name,
                                    save_directory=save_path,
                                )
            E.set_description(
                f"Epoch number {epoch}, Current train loss {a}, valid loss {a_v}, valid acc {valid_acc}"
            )

    return model
Esempio n. 6
0
def pred_target_train_model(
    model,
    train_iterator,
    criterion,
    optimizer,
    scheduler,
    writer,
    interrupted_path,
    test_data_iterator=None,
    num_updates: int = 250000,
    early_stop=None,
) -> torch.nn.Module:
    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_loss = 1e10
    since = time.time()

    try:
        sess = tqdm.tqdm(range(num_updates), leave=False, disable=False)
        val_loss = 0
        update_loss = 0
        val_acc = 0
        last_val = None
        last_out = None
        with torch.autograd.detect_anomaly():
            for update_i in sess:
                for phase in [Split.Training, Split.Validation]:
                    if phase == Split.Training:
                        with TorchTrainSession(model):

                            input, true_label = zip(*next(train_iterator))

                            rgb_imgs = torch_vision_normalize_batch_nchw(
                                uint_nhwc_to_nchw_float_batch(
                                    rgb_drop_alpha_batch_nhwc(
                                        to_tensor(input))))
                            true_label = to_tensor(true_label,
                                                   dtype=torch.long)
                            optimizer.zero_grad()

                            pred = model(rgb_imgs)
                            loss = criterion(pred, true_label)
                            loss.backward()
                            optimizer.step()

                            if last_out is None:
                                last_out = pred
                            else:
                                if not torch.dist(last_out, pred) > 0:
                                    print(f"Same output{last_out},{pred}")
                                last_out = pred

                            update_loss = loss.data.cpu().numpy()
                            writer.scalar(f"loss/train", update_loss, update_i)

                            if scheduler:
                                scheduler.step()
                    elif test_data_iterator:
                        with TorchEvalSession(model):
                            test_rgb_imgs, test_true_label = zip(
                                *next(train_iterator))
                            test_rgb_imgs = torch_vision_normalize_batch_nchw(
                                uint_nhwc_to_nchw_float_batch(
                                    rgb_drop_alpha_batch_nhwc(
                                        to_tensor(test_rgb_imgs))))

                            test_true_label = to_tensor(test_true_label,
                                                        dtype=torch.long)

                            with torch.no_grad():
                                val_pred = model(test_rgb_imgs)
                                val_loss = criterion(val_pred, test_true_label)

                            _, cat = torch.max(val_pred, -1)
                            val_acc = torch.sum(
                                cat == test_true_label) / float(cat.size(0))
                            writer.scalar(f"loss/acc", val_acc, update_i)
                            writer.scalar(f"loss/val", val_loss, update_i)

                            if last_val is None:
                                last_val = cat
                            else:
                                if all(last_val == cat):
                                    print(f"Same val{last_val},{cat}")
                                last_val = cat

                            if val_loss < best_val_loss:
                                best_val_loss = val_loss

                                best_model_wts = copy.deepcopy(
                                    model.state_dict())
                                sess.write(
                                    f"New best validation model at update {update_i} with test_loss {best_val_loss}"
                                )
                                torch.save(model.state_dict(),
                                           interrupted_path)

                        if early_stop is not None and val_pred < early_stop:
                            break
                sess.set_description_str(f"Update {update_i} - {phase} "
                                         f"update_loss:{update_loss:2f} "
                                         f"test_loss:{val_loss}"
                                         f"val_acc:{val_acc}")

    except KeyboardInterrupt:
        print("Interrupt")
    finally:
        pass

    model.load_state_dict(best_model_wts)  # load best model weights

    time_elapsed = time.time() - since
    print(f"{time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
    print(f"Best val loss: {best_val_loss:3f}")

    return model
Esempio n. 7
0
def predictor_response_train_model(
    model,
    *,
    train_iterator,
    criterion,
    optimizer,
    scheduler,
    writer,
    interrupted_path,
    val_data_iterator=None,
    num_updates: int = 250000,
    device=global_torch_device(),
    early_stop=None,
):
    """

    :param model:
    :param train_iterator:
    :param criterion:
    :param optimizer:
    :param scheduler:
    :param writer:
    :param interrupted_path:
    :param val_data_iterator:
    :param num_updates:
    :param device:
    :param early_stop:
    :return:
    """
    best_model_wts = copy.deepcopy(model.state_dict())
    best_val_loss = 1e10
    since = time.time()

    try:
        sess = tqdm(range(num_updates), leave=False, disable=False)
        val_loss = 0
        update_loss = 0
        val_acc = 0
        last_val = None
        last_out = None
        with torch.autograd.detect_anomaly():
            for update_i in sess:
                for phase in [Split.Training, Split.Validation]:
                    if phase == Split.Training:
                        with TorchTrainSession(model):

                            input, true_label = next(train_iterator)

                            rgb_imgs = to_tensor(
                                input, dtype=torch.float, device=device
                            ).repeat(1, 3, 1, 1)
                            true_label = to_tensor(
                                true_label, dtype=torch.long, device=device
                            )
                            optimizer.zero_grad()

                            pred = model(rgb_imgs)
                            loss = criterion(pred, true_label)
                            loss.backward()
                            optimizer.step()

                            update_loss = loss.data.cpu().numpy()
                            writer.scalar(f"loss/train", update_loss, update_i)

                            if scheduler:
                                scheduler.step()
                    elif val_data_iterator:
                        with TorchEvalSession(model):
                            test_rgb_imgs, test_true_label = next(val_data_iterator)

                            test_rgb_imgs = to_tensor(
                                test_rgb_imgs, dtype=torch.float, device=device
                            ).repeat(1, 3, 1, 1)
                            test_true_label = to_tensor(
                                test_true_label, dtype=torch.long, device=device
                            )

                            with torch.no_grad():
                                val_pred = model(test_rgb_imgs)
                                val_loss = criterion(val_pred, test_true_label)

                            _, cat = torch.max(val_pred, -1)
                            val_acc = torch.sum(cat == test_true_label) / float(
                                cat.size(0)
                            )
                            writer.scalar(f"loss/acc", val_acc, update_i)
                            writer.scalar(f"loss/val", val_loss, update_i)

                            if val_loss < best_val_loss:
                                best_val_loss = val_loss

                                best_model_wts = copy.deepcopy(model.state_dict())
                                sess.write(
                                    f"New best validation model at update {update_i} with best_val_loss {best_val_loss}"
                                )
                                torch.save(model.state_dict(), interrupted_path)

                        if early_stop is not None and val_pred < early_stop:
                            break
                sess.set_description_str(
                    f"Update {update_i} - {phase} "
                    f"update_loss:{update_loss:2f} "
                    f"val_loss:{val_loss}"
                    f"val_acc:{val_acc}"
                )

    except KeyboardInterrupt:
        print("Interrupt")
    finally:
        pass

    model.load_state_dict(best_model_wts)  # load best model weights

    time_elapsed = time.time() - since
    print(f"{time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s")
    print(f"Best val loss: {best_val_loss:3f}")

    return model
Esempio n. 8
0
def inner_train_ssd(*,
    data_root: Path,
    cfg: NOD,
    model: Module,
    data_loader: DataLoader,
    optimiser: Optimizer,
    scheduler: WarmupMultiStepLR,
    check_pointer: callable,
    device: callable,
    arguments: callable,
    kws: NOD,
) -> Module:
    """

:param data_root:
:type data_root:
:param cfg:
:type cfg:
:param model:
:type model:
:param data_loader:
:type data_loader:
:param optimiser:
:type optimiser:
:param scheduler:
:type scheduler:
:param check_pointer:
:type check_pointer:
:param device:
:type device:
:param arguments:
:type arguments:
:param kws:
:type kws:
:return:
:rtype:
"""
    logger = logging.getLogger("SSD.trainer")
    logger.info("Start training ...")
    meters = MetricLogger()

    with TorchTrainSession(model):
        save_to_disk = global_distribution_rank() == 0
        if kws.use_tensorboard and save_to_disk:
            import tensorboardX

            writer = tensorboardX.SummaryWriter(
                log_dir=str(PROJECT_APP_PATH.user_data / "results" / "tf_logs")
            )
        else:
            writer = None

        max_iter = len(data_loader)
        start_iter = arguments["iteration"]
        start_training_time = time.time()
        end = time.time()
        for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
            arguments["iteration"] = iteration

            images = images.to(device)
            targets = targets.to(device)
            loss_instance = MultiBoxLoss(neg_pos_ratio=cfg.model.neg_pos_ratio)
            cls_logits, bbox_pred = model(images)

            reg_loss, cls_loss = loss_instance(
                cls_logits, bbox_pred, targets.labels, targets.boxes
            )
            loss_dict = dict(reg_loss=reg_loss, cls_loss=cls_loss)

            loss = sum(loss for loss in loss_dict.values())

            loss_dict_reduced = reduce_loss_dict(
                loss_dict
            )  # reduce losses over all GPUs for logging purposes
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            meters.update(total_loss=losses_reduced, **loss_dict_reduced)

            optimiser.zero_grad()
            loss.backward()
            optimiser.step()
            scheduler.step()

            batch_time = time.time() - end
            end = time.time()
            meters.update(time=batch_time)
            if iteration % kws.log_step == 0:
                eta_seconds = meters.time.global_avg * (max_iter - iteration)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                logger.info(
                    meters.delimiter.join(
                        [
                            f"iter: {iteration:06d}",
                            f"lr: {optimiser.param_groups[0]['lr']:.5f}",
                            f"{str(meters)}",
                            f"eta: {eta_string}",
                            f"mem: {round(torch.cuda.max_memory_allocated() / 1024.0 / 1024.0)}M",
                        ]
                    )
                )
                if writer:
                    global_step = iteration
                    writer.add_scalar(
                        "losses/total_loss", losses_reduced, global_step=global_step
                    )
                    for loss_name, loss_item in loss_dict_reduced.items():
                        writer.add_scalar(
                            f"losses/{loss_name}", loss_item, global_step=global_step
                        )
                    writer.add_scalar(
                        "lr", optimiser.param_groups[0]["lr"], global_step=global_step
                    )

            if iteration % kws.save_step == 0:
                check_pointer.save(f"model_{iteration:06d}", **arguments)

            if (
                kws.eval_step > 0
                and iteration % kws.eval_step == 0
                and not iteration == max_iter
            ):
                with TorchEvalSession(model):
                    eval_results = do_ssd_evaluation(
                        data_root,
                        cfg,
                        model,
                        distributed=kws.distributed,
                        iteration=iteration,
                    )
                    if global_distribution_rank() == 0 and writer:
                        for eval_result, dataset in zip(
                            eval_results, cfg.datasets.test
                        ):
                            write_metrics_recursive(
                                eval_result["metrics"],
                                "metrics/" + dataset,
                                writer,
                                iteration,
                            )

        check_pointer.save("model_final", **arguments)

        total_training_time = int(
            time.time() - start_training_time
        )  # compute training time
        logger.info(
            f"Total training time: {datetime.timedelta(seconds=total_training_time)} ("
            f"{total_training_time / max_iter:.4f} s / it)"
        )
        return model
def train_person_segmenter(
    model,
    train_loader,
    valid_loader,
    criterion,
    optimizer,
    scheduler,
    save_model_path: Path,
    n_epochs: int = 100,
):
    """

:param model:
:type model:
:param train_loader:
:type train_loader:
:param valid_loader:
:type valid_loader:
:param criterion:
:type criterion:
:param optimizer:
:type optimizer:
:param scheduler:
:type scheduler:
:param save_model_path:
:type save_model_path:
:param n_epochs:
:type n_epochs:
:return:
:rtype:
"""
    valid_loss_min = numpy.Inf  # track change in validation loss
    assert n_epochs > 0, n_epochs
    E = tqdm(range(1, n_epochs + 1))
    for epoch in E:
        train_loss = 0.0
        valid_loss = 0.0
        dice_score = 0.0

        with TorchTrainSession(model):
            for data, target in tqdm(train_loader):
                data, target = (
                    data.to(global_torch_device()),
                    target.to(global_torch_device()),
                )
                optimizer.zero_grad()
                output, *_ = model(data)
                output = torch.sigmoid(output)
                loss = criterion(output, target)
                loss.backward()
                optimizer.step()
                train_loss += loss.item() * data.size(0)

        with TorchEvalSession(model):
            with torch.no_grad():
                for data, target in tqdm(valid_loader):
                    data, target = (
                        data.to(global_torch_device()),
                        target.to(global_torch_device()),
                    )
                    output, *_ = model(
                        data
                    )  # forward pass: compute predicted outputs by passing inputs to the model
                    output = torch.sigmoid(output)
                    loss = criterion(output,
                                     target)  # calculate the batch loss
                    valid_loss += loss.item() * data.size(
                        0)  # update average validation loss
                    dice_cof = intersection_over_union(
                        output.cpu().detach().numpy(),
                        target.cpu().detach().numpy())
                    dice_score += dice_cof * data.size(0)

        # calculate average losses
        train_loss = train_loss / len(train_loader.dataset)
        valid_loss = valid_loss / len(valid_loader.dataset)
        dice_score = dice_score / len(valid_loader.dataset)

        # print training/validation statistics
        E.set_description(f"Epoch: {epoch}"
                          f" Training Loss: {train_loss:.6f} "
                          f"Validation Loss: {valid_loss:.6f} "
                          f"Dice Score: {dice_score:.6f}")

        # save model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print(
                f"Validation loss decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}).  Saving model ..."
            )
            torch.save(model.state_dict(), save_model_path)
            valid_loss_min = valid_loss

        scheduler.step()
        model, scheduler = reschedule_learning_rate(model, epoch, scheduler)

    return model
Esempio n. 10
0
def main():
    pyplot.style.use("bmh")
    base_path = Path.home() / "/Data" / "PennFudanPed"

    save_model_path = PROJECT_APP_PATH.user_data / 'models' / "penn_fudan_ped_seg.model"
    train_model = False
    eval_model = not train_model
    SEED = 87539842
    batch_size = 8
    num_workers = 1  # os.cpu_count()
    learning_rate = 0.01
    torch_seed(SEED)

    train_set = PennFudanDataset(base_path, Split.Training)
    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=num_workers)
    valid_loader = DataLoader(
        PennFudanDataset(base_path, Split.Validation),
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
    )

    model = SkipHourglassFission(
        input_channels=train_set.predictor_shape[-1],
        output_heads=(train_set.response_shape[-1], ),
        encoding_depth=1,
    )
    model.to(global_torch_device())

    if train_model:
        if save_model_path.exists():
            model.load_state_dict(torch.load(str(save_model_path)))
            print("loading saved model")

        with TorchTrainSession(model):
            criterion = BCEDiceLoss(eps=1.0)
            optimiser = torch.optim.SGD(model.parameters(), lr=learning_rate)
            scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimiser, T_max=7, eta_min=learning_rate / 100, last_epoch=-1)

            model = train_person_segmenter(
                model,
                train_loader,
                valid_loader,
                criterion,
                optimiser,
                scheduler,
                save_model_path,
            )

    if eval_model:
        if save_model_path.exists():
            model.load_state_dict(torch.load(str(save_model_path)))
            print("loading saved model")

        with TorchDeviceSession(global_torch_device(cuda_if_available=False),
                                model):
            with torch.no_grad():
                with TorchCacheSession():
                    with TorchEvalSession(model):
                        valid_masks = []
                        a = (350, 525)
                        tr = min(len(valid_loader.dataset) * 4, 2000)
                        probabilities = numpy.zeros((tr, *a),
                                                    dtype=numpy.float32)
                        for sample_i, (data, target) in enumerate(
                                tqdm(valid_loader)):
                            data = data.to(global_torch_device())
                            target = target.cpu().detach().numpy()
                            outpu, *_ = model(data)
                            outpu = torch.sigmoid(outpu).cpu().detach().numpy()
                            for p in range(data.shape[0]):
                                output, mask = outpu[p], target[p]
                                """
for m in mask:
  valid_masks.append(cv2_resize(m, a))
for probability in output:
  probabilities[sample_i, :, :] = cv2_resize(probability, a)
  sample_i += 1
"""
                                if sample_i >= tr - 1:
                                    break
                            if sample_i >= tr - 1:
                                break

                        f, ax = pyplot.subplots(3, 3, figsize=(24, 12))

                        for i in range(3):
                            ax[0, i].imshow(valid_masks[i], vmin=0, vmax=1)
                            ax[0, i].set_title("Original", fontsize=14)

                            ax[1, i].imshow(valid_masks[i], vmin=0, vmax=1)
                            ax[1, i].set_title("Target", fontsize=14)

                            ax[2, i].imshow(probabilities[i], vmin=0, vmax=1)
                            ax[2, i].set_title("Prediction", fontsize=14)

                        pyplot.show()
Esempio n. 11
0
def train_d(
    model,
    train_loader,
    valid_loader,
    criterion,
    optimiser,
    scheduler,
    save_model_path,
    n_epochs=0,
):
    """

    Args:
      model:
      train_loader:
      valid_loader:
      criterion:
      optimiser:
      scheduler:
      save_model_path:
      n_epochs:

    Returns:

    """
    valid_loss_min = numpy.Inf  # track change in validation loss
    E = tqdm(range(1, n_epochs + 1))
    for epoch in E:
        train_loss = 0.0
        valid_loss = 0.0
        dice_score = 0.0

        with TorchTrainSession(model):
            train_set = tqdm(train_loader, postfix={"train_loss": 0.0})
            for data, target in train_set:
                data, target = (
                    data.to(global_torch_device()),
                    target.to(global_torch_device()),
                )
                optimiser.zero_grad()
                output, *_ = model(data)
                output = torch.sigmoid(output)
                loss = criterion(output, target)
                loss.backward()
                optimiser.step()
                train_loss += loss.item() * data.size(0)
                train_set.set_postfix(ordered_dict={"train_loss": loss.item()})

        with TorchEvalSession(model):
            with torch.no_grad():
                validation_set = tqdm(valid_loader,
                                      postfix={
                                          "valid_loss": 0.0,
                                          "dice_score": 0.0
                                      })
                for data, target in validation_set:
                    data, target = (
                        data.to(global_torch_device()),
                        target.to(global_torch_device()),
                    )
                    # forward pass: compute predicted outputs by passing inputs to the model
                    output, *_ = model(data)
                    output = torch.sigmoid(output)
                    # calculate the batch loss
                    loss = criterion(output, target)
                    # update average validation loss
                    valid_loss += loss.item() * data.size(0)
                    dice_cof = intersection_over_union(
                        output.cpu().detach().numpy(),
                        target.cpu().detach().numpy())
                    dice_score += dice_cof * data.size(0)
                    validation_set.set_postfix(ordered_dict={
                        "valid_loss": loss.item(),
                        "dice_score": dice_cof
                    })

        # calculate average losses
        train_loss = train_loss / len(train_loader.dataset)
        valid_loss = valid_loss / len(valid_loader.dataset)
        dice_score = dice_score / len(valid_loader.dataset)

        # print training/validation statistics
        E.set_description(f"Epoch: {epoch}"
                          f" Training Loss: {train_loss:.6f} "
                          f"Validation Loss: {valid_loss:.6f} "
                          f"Dice Score: {dice_score:.6f}")

        # save model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print(
                f"Validation loss decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}).  Saving model ..."
            )
            torch.save(model.state_dict(), save_model_path)
            valid_loss_min = valid_loss

        scheduler.step()
        model, scheduler = reschedule(model, epoch, scheduler)

    return model
Esempio n. 12
0
    def main():
        dataset_root = Path.home() / "Data"
        base_path = ensure_existence(PROJECT_APP_PATH.user_data / 'maskrcnn')
        log_path = ensure_existence(PROJECT_APP_PATH.user_log / 'maskrcnn')
        export_root = ensure_existence(base_path / 'models')
        model_name = f'maskrcnn_pennfudanped'

        batch_size = 4
        num_epochs = 10
        optimiser_spec = GDKC(torch.optim.Adam, lr=3e-4)
        scheduler_spec = GDKC(
            torch.optim.lr_scheduler.
            StepLR,  # a learning rate scheduler which decreases the learning rate by
            step_size=3,  # 10x every 3 epochs
            gamma=0.1,
        )
        num_workers = os.cpu_count()
        torch_seed(3825)

        dataset = PennFudanDataset(dataset_root / "PennFudanPed",
                                   Split.Training,
                                   return_variant=ReturnVariant.all)
        dataset_validation = PennFudanDataset(
            dataset_root / "PennFudanPed",
            Split.Validation,
            return_variant=ReturnVariant.all,
        )
        split = SplitIndexer(len(dataset), validation=0.3, testing=0)

        split_indices = torch.randperm(split.total_num).tolist()

        data_loader = DataLoader(
            Subset(dataset, split_indices[:-split.validation_num]),
            batch_size=batch_size,
            shuffle=True,
            num_workers=num_workers,
            collate_fn=collate_batch_fn,
        )

        data_loader_val = DataLoader(
            Subset(dataset_validation, split_indices[-split.validation_num:]),
            batch_size=1,
            shuffle=False,
            num_workers=num_workers,
            collate_fn=collate_batch_fn,
        )

        model = get_pretrained_instance_segmentation_maskrcnn(
            dataset.response_channels)
        optimiser = optimiser_spec(trainable_parameters(model))
        lr_scheduler = scheduler_spec(optimiser)

        if True:
            model = load_model(model_name=model_name,
                               model_directory=export_root)

        if True:
            with TorchTrainSession(model):
                with TensorBoardPytorchWriter(log_path / model_name) as writer:
                    for epoch_i in tqdm(range(num_epochs), desc="Epoch #"):
                        maskrcnn_train_single_epoch(model=model,
                                                    optimiser=optimiser,
                                                    data_loader=data_loader,
                                                    writer=writer)
                        lr_scheduler.step()  # update the learning rate
                        maskrcnn_evaluate(
                            model, data_loader_val, writer=writer
                        )  # evaluate on the validation dataset
                        save_model(model,
                                   model_name=model_name,
                                   save_directory=export_root)

        if True:
            with TorchEvalSession(model):  # put the model in evaluation mode
                img, _ = dataset_validation[
                    0]  # pick one image from the test set

                with torch.no_grad():
                    prediction = model([img.to(global_torch_device())])

                from matplotlib import pyplot
                pyplot.imshow(
                    Image.fromarray(
                        img.mul(255).permute(1, 2, 0).byte().numpy()))
                pyplot.show()

                import cv2

                pyplot.imshow(
                    Image.fromarray(prediction[0]["masks"][0, 0].mul(
                        255).byte().cpu().numpy()))
                pyplot.show()

                (boxes, labels, scores) = (
                    prediction[0]["boxes"].to('cpu').numpy(),
                    prediction[0]["labels"].to('cpu').numpy(),
                    torch.sigmoid(prediction[0]["scores"]).to('cpu').numpy(),
                )

                from draugr.opencv_utilities import draw_bounding_boxes
                from draugr.torch_utilities.images.conversion import quick_to_pil_image

                indices = scores > 0.1

                cv2.namedWindow(model_name, cv2.WINDOW_NORMAL)
                cv2.imshow(
                    model_name,
                    draw_bounding_boxes(
                        quick_to_pil_image(img),
                        boxes[indices],
                        labels=labels[indices],
                        scores=scores[indices],
                        #categories=categories,
                    ))

                cv2.waitKey()
def train_siamese(
    model: Module,
    optimiser: Optimizer,
    criterion: callable,
    *,
    writer: Writer = MockWriter(),
    train_number_epochs: int,
    data_dir: Path,
    train_batch_size: int,
    model_name: str,
    save_path: Path,
    save_best: bool = False,
    img_size: Tuple[int, int],
    validation_interval: int = 1,
):
    """
:param img_size:
:type img_size:
:param validation_interval:
:type validation_interval:
:param data_dir:
:type data_dir:
:param optimiser:
:type optimiser:
:param criterion:
:type criterion:
:param writer:
:type writer:
:param model_name:
:type model_name:
:param save_path:
:type save_path:
:param save_best:
:type save_best:
:param model:
:type model:
:param train_number_epochs:
:type train_number_epochs:
:param train_batch_size:
:type train_batch_size:
:return:
:rtype:
"""

    train_dataloader = DataLoader(
        PairDataset(
            data_path=data_dir,
            transform=transforms.Compose([
                transforms.Grayscale(),
                transforms.Resize(img_size),
                transforms.ToTensor(),
            ]),
            split=Split.Training,
        ),
        shuffle=True,
        num_workers=4,
        batch_size=train_batch_size,
    )

    valid_dataloader = DataLoader(
        PairDataset(
            data_path=data_dir,
            transform=transforms.Compose([
                transforms.Grayscale(),
                transforms.Resize(img_size),
                transforms.ToTensor(),
            ]),
            split=Split.Validation,
        ),
        shuffle=True,
        num_workers=4,
        batch_size=train_batch_size,
    )

    best = math.inf

    E = tqdm(range(0, train_number_epochs))
    batch_counter = count()

    for epoch in E:
        for tss in train_dataloader:
            batch_i = next(batch_counter)
            with TorchTrainSession(model):
                o = [t.to(global_torch_device()) for t in tss]
                optimiser.zero_grad()
                loss_contrastive = criterion(model(*o[:2]),
                                             o[2].to(dtype=torch.float))
                loss_contrastive.backward()
                optimiser.step()
                train_loss = loss_contrastive.cpu().item()
                writer.scalar("train_loss", train_loss, batch_i)
            if batch_counter.__next__() % validation_interval == 0:
                with TorchEvalSession(model):
                    for tsv in valid_dataloader:
                        ov = [t.to(global_torch_device()) for t in tsv]
                        v_o, fact = model(*ov[:2]), ov[2].to(dtype=torch.float)
                        valid_loss = criterion(v_o, fact).cpu().item()
                        valid_accuracy = (accuracy(distances=v_o,
                                                   is_diff=fact).cpu().item())
                        writer.scalar("valid_loss", valid_loss, batch_i)
                        if valid_loss < best:
                            best = valid_loss
                            print(f"new best {best}")
                            writer.blip("new_best", batch_i)
                            if save_best:
                                save_model_parameters(
                                    model,
                                    optimiser=optimiser,
                                    model_name=model_name,
                                    save_directory=save_path,
                                )
            E.set_description(
                f"Epoch number {epoch}, Current train loss {train_loss}, valid loss {valid_loss}, valid_accuracy {valid_accuracy}"
            )

    return model
Esempio n. 14
0
def train_person_segmentor(
        model: torch.nn.Module,
        train_loader: torch.utils.data.DataLoader,
        valid_loader: torch.utils.data.DataLoader,
        criterion: callable,
        optimiser: torch.optim.Optimizer,
        *,
        save_model_path: Path,
        learning_rate: Number = 6e-2,
        scheduler: torch.optim.lr_scheduler = None,
        n_epochs: int = 100,
        writer: ImageWriterMixin = MockWriter(),
):
    """

    :param model:
    :type model:
    :param train_loader:
    :type train_loader:
    :param valid_loader:
    :type valid_loader:
    :param criterion:
    :type criterion:
    :param optimiser:
    :type optimiser:
    :param scheduler:
    :type scheduler:
    :param save_model_path:
    :type save_model_path:
    :param n_epochs:
    :type n_epochs:
    :return:
    :rtype:"""
    valid_loss_min = numpy.Inf  # track change in validation loss
    assert n_epochs > 0, n_epochs
    E = tqdm(range(1, n_epochs + 1))
    for epoch_i in E:
        train_loss = 0.0
        valid_loss = 0.0

        with TorchTrainSession(model):
            for data, target in tqdm(train_loader):
                output, *_ = model(data.to(global_torch_device()))
                loss = criterion(output,
                                 target.to(global_torch_device()).float())

                optimiser.zero_grad()
                loss.backward()
                optimiser.step()

                train_loss += loss.cpu().item() * data.size(0)

        with TorchEvalSession(model):
            with torch.no_grad():
                for data, target in tqdm(valid_loader):
                    target = target.float()
                    (
                        output,
                        *_,
                    ) = model(  # forward pass: compute predicted outputs by passing inputs to the model
                        data.to(global_torch_device()))
                    validation_loss = criterion(
                        output, target.to(
                            global_torch_device()))  # calculate the batch loss
                    writer.scalar(
                        "dice_validation",
                        dice_loss(output, target.to(global_torch_device())),
                    )

                    valid_loss += validation_loss.detach().cpu().item(
                    ) * data.size(0)  # update average validation loss
                writer.image("input", data, epoch_i)  # write the last batch
                writer.image("truth", target, epoch_i)  # write the last batch
                writer.image("prediction", torch.sigmoid(output),
                             epoch_i)  # write the last batch

        # calculate average losses
        train_loss = train_loss / len(train_loader.dataset)
        valid_loss = valid_loss / len(valid_loader.dataset)

        # save model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print(
                f"Validation loss decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}).  Saving model ..."
            )
            torch.save(model.state_dict(), save_model_path)
            valid_loss_min = valid_loss

        if scheduler:
            scheduler.step()
            optimiser, scheduler = reschedule_learning_rate(
                model,
                optimiser,
                epoch_i,
                scheduler,
                starting_learning_rate=learning_rate,
            )

        # print training/validation statistics
        current_lr = next(iter(optimiser.param_groups))["lr"]
        E.set_description(f"Epoch: {epoch_i} "
                          f"Training Loss: {train_loss:.6f} "
                          f"Validation Loss: {valid_loss:.6f} "
                          f"Learning rate: {current_lr:.6f}")
        writer.scalar("training_loss", train_loss)
        writer.scalar("validation_loss", valid_loss)
        writer.scalar("learning_rate", current_lr)

    return model
Esempio n. 15
0
def main(
    base_path: Path = Path.home() / "Data" / "Datasets" / "PennFudanPed",
    train_model: bool = True,
    load_prev_model: bool = True,
    writer: Writer = TensorBoardPytorchWriter(PROJECT_APP_PATH.user_log /
                                              "instanced_person_segmentation" /
                                              f"{time.time()}"),
):
    """ """

    # base_path = Path("/") / "encrypted_disk" / "heider" / "Data" / "PennFudanPed"
    base_path: Path = Path.home() / "Data3" / "PennFudanPed"
    # base_path = Path('/media/heider/OS/Users/Christian/Data/Datasets/')  / "PennFudanPed"
    pyplot.style.use("bmh")

    save_model_path = (
        ensure_existence(PROJECT_APP_PATH.user_data / "models") /
        "instanced_penn_fudan_ped_seg.model")

    eval_model = not train_model
    SEED = 9221
    batch_size = 32
    num_workers = 0
    encoding_depth = 2
    learning_rate = 6e-6  # sequence 6e-2 6e-3 6e-4 6e-5

    seed_stack(SEED)

    train_set = PennFudanDataset(
        base_path,
        SplitEnum.training,
        return_variant=PennFudanDataset.PennFudanReturnVariantEnum.instanced,
    )

    train_loader = DataLoader(train_set,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=num_workers)
    valid_loader = DataLoader(
        PennFudanDataset(
            base_path,
            SplitEnum.validation,
            return_variant=PennFudanDataset.PennFudanReturnVariantEnum.
            instanced,
        ),
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
    )

    model = SkipHourglassFission(
        input_channels=train_set.predictor_shape[-1],
        output_heads=(train_set.response_shape[-1], ),
        encoding_depth=encoding_depth,
    )
    model.to(global_torch_device())

    if load_prev_model and save_model_path.exists():
        model.load_state_dict(torch.load(str(save_model_path)))
        print("loading saved model")

    if train_model:
        with TorchTrainSession(model):
            criterion = BCEDiceLoss()
            # optimiser = torch.optim.SGD(model.parameters(), lr=learning_rate)
            optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)
            # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(                optimiser, T_max=7, eta_min=learning_rate / 100, last_epoch=-1            )

            model = train_person_segmentor(
                model,
                train_loader,
                valid_loader,
                criterion,
                optimiser,
                save_model_path=save_model_path,
                learning_rate=learning_rate,
                writer=writer,
            )

    if eval_model:
        validate_model(model, valid_loader)