Beispiel #1
0
def valid_fn(
    model: nn.Module,
    loader: DataLoader,
    device: str,
    loss_fn: nn.Module,
    verbose: bool = True,
) -> dict:
    """Validation step.

    Args:
        model (nn.Module): model to train
        loader (DataLoader): loader with data
        device (str): device to use for placing batches
        loss_fn (nn.Module): loss function, should be callable
        verbose (bool, optional): verbosity mode.
            Default is True.

    Returns:
        dict with metics computed during the validation on loader
    """
    model.eval()

    metrics = {
        "loss": [],
        "gap": [],
        "accuracy": [],
    }

    with torch.no_grad(), tqdm(
        total=len(loader), desc="valid", disable=not verbose
    ) as progress:
        for _idx, batch in enumerate(loader):
            inputs, targets = t2d(batch, device)

            outputs = model(inputs, targets)
            loss = loss_fn(outputs, targets)

            _loss = loss.detach().item()
            metrics["loss"].append(_loss)

            classes = torch.argmax(outputs, 1)
            _acc = (classes == targets).float().mean().detach().item()
            metrics["accuracy"].append(_acc)

            confidences, predictions = torch.max(outputs, dim=1)
            _gap = gap(predictions, confidences, targets)
            metrics["gap"].append(_gap)

            progress.set_postfix_str(
                f"loss {_loss:.4f}, gap {_gap:.4f}, accuracy - {_acc}"
            )
            progress.update(1)

            if _idx == DEBUG:
                break

    metrics["loss"] = np.mean(metrics["loss"])
    metrics["gap"] = np.mean(metrics["gap"])
    metrics["accuracy"] = np.mean(metrics["accuracy"])
    return metrics
Beispiel #2
0
def train_fn(
    model,
    loader,
    device,
    loss_fn,
    optimizer,
    scheduler=None,
    accumulation_steps=1,
    verbose=True,
):
    """Train step.
    Args:
        model (nn.Module): model to train
        loader (DataLoader): loader with data
        device (str or torch.device): device to use for placing batches
        loss_fn (nn.Module): loss function, should be callable
        optimizer (torch.optim.Optimizer): model parameters optimizer
        scheduler ([type], optional): batch scheduler to use.
            Default is `None`.
        accumulation_steps (int, optional): number of steps to accumulate gradients.
            Default is `1`.
        verbose (bool, optional): verbosity mode.
            Default is True.
    Returns:
        dict with metics computed during the training on loader
    """
    model.train()
    metrics = {"loss": 0.0}
    with tqdm(total=len(loader), desc="train",
              disable=not verbose) as progress:
        for idx, batch in enumerate(loader):
            batch = t2d(batch, device)

            zero_grad(optimizer)

            target_availabilities = batch["target_availabilities"].unsqueeze(
                -1)
            targets = batch["target_positions"]
            outputs = model(batch["image"]).reshape(targets.shape)
            loss = (loss_fn(outputs, targets) * target_availabilities).mean()

            _loss = loss.detach().item()
            metrics["loss"] += _loss

            loss.backward()

            progress.set_postfix_str(f"loss - {_loss:.5f}")
            progress.update(1)

            if (idx + 1) % accumulation_steps == 0:
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()

            if idx == DEBUG:
                break

    metrics["loss"] /= idx + 1
    return metrics
Beispiel #3
0
def valid_fn(model, loader, device, loss_fn, verbose=True):
    """Validation step.

    Args:
        model (nn.Module): model to train
        loader (DataLoader): loader with data
        device (str or torch.device): device to use for placing batches
        loss_fn (nn.Module): loss function, should be callable
        verbose (bool, optional): verbosity mode.
            Default is True.

    Returns:
        dict with metics computed during the validation on loader
    """
    model.eval()
    metrics = {"loss": 0.0}
    with torch.no_grad(), tqdm(total=len(loader),
                               desc="valid",
                               disable=not verbose) as progress:
        for idx, batch in enumerate(loader):
            images, targets, target_availabilities = t2d(
                (
                    batch["image"],
                    batch["target_positions"],
                    batch["target_availabilities"],
                ),
                device,
            )

            predictions, confidences = model(images)
            loss = loss_fn(targets, predictions, confidences,
                           target_availabilities)

            _loss = loss.detach().item()
            metrics["loss"] += _loss

            progress.set_postfix_str(f"loss - {_loss:.5f}")
            progress.update(1)

            if idx == DEBUG:
                break

    metrics["loss"] /= idx + 1
    return metrics
Beispiel #4
0
def train_fn(
    model: nn.Module,
    loader: DataLoader,
    device: str,
    loss_fn: nn.Module,
    optimizer: optim.Optimizer,
    scheduler=None,
    accumulation_steps: int = 1,
    verbose: bool = True,
) -> dict:
    """Train step.

    Args:
        model (nn.Module): model to train
        loader (DataLoader): loader with data
        device (str): device to use for placing batches
        loss_fn (nn.Module): loss function, should be callable
        optimizer (optim.Optimizer): model parameters optimizer
        scheduler ([type], optional): batch scheduler to use.
            Default is `None`.
        accumulation_steps (int, optional): number of steps to accumulate gradients.
            Default is `1`.
        verbose (bool, optional): verbosity mode.
            Default is True.

    Returns:
        dict with metics computed during the training on loader
    """
    model.train()

    metrics = {
        "loss": [],
        "gap": [],
        "accuracy": [],
    }

    with tqdm(total=len(loader), desc="train",
              disable=not verbose) as progress:
        for _idx, batch in enumerate(loader):
            inputs, targets = t2d(batch, device)

            zero_grad(optimizer)

            outputs = model(inputs, targets)
            loss = loss_fn(outputs, targets)

            _loss = loss.detach().item()
            metrics["loss"].append(_loss)

            classes = torch.argmax(outputs, 1)
            _acc = (classes == targets).float().mean().detach().item()
            metrics["accuracy"].append(_acc)

            confidences, predictions = torch.max(outputs, dim=1)
            _gap = gap(predictions, confidences, targets)
            metrics["gap"].append(_gap)

            loss.backward()

            progress.set_postfix_str(
                f"loss {_loss:.4f}, gap {_gap:.4f}, accuracy {_acc:.4f}")

            if (_idx + 1) % accumulation_steps == 0:
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()

            progress.update(1)

            if _idx == DEBUG:
                break

    metrics["loss"] = np.mean(metrics["loss"])
    metrics["gap"] = np.mean(metrics["gap"])
    metrics["accuracy"] = np.mean(metrics["accuracy"])
    return metrics
Beispiel #5
0
def train_fn(
    model,
    loader,
    device,
    loss_fn,
    optimizer,
    scheduler=None,
    accumulation_steps=1,
    verbose=True,
    tensorboard_logger=None,
):
    """Train step.
    Args:
        model (nn.Module): model to train
        loader (DataLoader): loader with data
        device (str or torch.device): device to use for placing batches
        loss_fn (nn.Module): loss function, should be callable
        optimizer (torch.optim.Optimizer): model parameters optimizer
        scheduler ([type], optional): batch scheduler to use.
            Default is `None`.
        accumulation_steps (int, optional): number of steps to accumulate gradients.
            Default is `1`.
        verbose (bool, optional): verbosity mode.
            Default is True.
    Returns:
        dict with metics computed during the training on loader
    """
    model.train()
    metrics = {"loss": 0.0}
    with tqdm(total=len(loader), desc="train",
              disable=not verbose) as progress:
        for idx, batch in enumerate(loader):
            (
                images,
                targets,
                target_availabilities,
                squares,
                months,
                weekdays,
                hours,
            ) = t2d(
                (
                    batch["image"],
                    batch["target_positions"],
                    batch["target_availabilities"],
                    batch["square_category"],
                    batch["time_month"],
                    batch["time_weekday"],
                    batch["time_hour"],
                ),
                device,
            )

            zero_grad(optimizer)

            predictions, confidences = model(images, squares, months, weekdays,
                                             hours)
            loss = loss_fn(targets, predictions, confidences,
                           target_availabilities)

            _loss = loss.detach().item()
            metrics["loss"] += _loss

            if tensorboard_logger is not None:
                tensorboard_logger.metric("loss", _loss, idx)

            loss.backward()

            progress.set_postfix_str(f"loss - {_loss:.5f}")
            progress.update(1)

            if (idx + 1) % accumulation_steps == 0:
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()

            if idx == DEBUG:
                break

    metrics["loss"] /= idx + 1
    return metrics
Beispiel #6
0
def valid_fn(model, loader, device, ground_truth_file, logdir, verbose=True):
    """Validation step.

    Args:
        model (nn.Module): model to train
        loader (DataLoader): loader with data
        device (str or torch.device): device to use for placing batches
        loss_fn (nn.Module): loss function, should be callable
        verbose (bool, optional): verbosity mode.
            Default is True.

    Returns:
        dict with metics computed during the validation on loader
    """
    model.eval()

    future_coords_offsets_pd = []
    timestamps = []
    confidences_list = []
    agent_ids = []

    with torch.no_grad(), tqdm(
        total=len(loader), desc="valid", disable=not verbose
    ) as progress:
        for idx, batch in enumerate(loader):
            images, acceleration = t2d(
                [batch["image"], batch["xy_acceleration"]], device
            )

            predictions, confidences = model(images, acceleration)

            _gt = batch["target_positions"].cpu().numpy().copy()
            predictions = predictions.cpu().numpy().copy()
            world_from_agents = batch["world_from_agent"].numpy()
            centroids = batch["centroid"].numpy()

            for idx in range(len(predictions)):
                for mode in range(3):
                    # FIX
                    predictions[idx, mode, :, :] = (
                        transform_points(
                            predictions[idx, mode, :, :], world_from_agents[idx]
                        )
                        - centroids[idx][:2]
                    )
                _gt[idx, :, :] = (
                    transform_points(_gt[idx, :, :], world_from_agents[idx])
                    - centroids[idx][:2]
                )

            future_coords_offsets_pd.append(predictions.copy())
            confidences_list.append(confidences.cpu().numpy().copy())
            timestamps.append(batch["timestamp"].numpy().copy())
            agent_ids.append(batch["track_id"].numpy().copy())

            progress.update(1)

            if idx == DEBUG:
                break

    predictions_file = str(logdir / "preds_validate_chopped.csv")
    write_pred_csv(
        predictions_file,
        timestamps=np.concatenate(timestamps),
        track_ids=np.concatenate(agent_ids),
        coords=np.concatenate(future_coords_offsets_pd),
        confs=np.concatenate(confidences_list),
    )

    metrics = compute_metrics_csv(
        ground_truth_file,
        predictions_file,
        [neg_multi_log_likelihood],
    )

    return {"score": metrics["neg_multi_log_likelihood"]}
Beispiel #7
0
def train_fn(
    model,
    loader,
    device,
    loss_fn,
    optimizer,
    scheduler=None,
    accumulation_steps=1,
    verbose=True,
    tensorboard_logger=None,
    logdir=None,
    validation_fn=None,
):
    """Train step.

    Args:
        model (nn.Module): model to train
        loader (DataLoader): loader with data
        device (str or torch.device): device to use for placing batches
        loss_fn (nn.Module): loss function, should be callable
        optimizer (torch.optim.Optimizer): model parameters optimizer
        scheduler ([type], optional): batch scheduler to use.
            Default is `None`.
        accumulation_steps (int, optional): number of steps to accumulate gradients.
            Default is `1`.
        verbose (bool, optional): verbosity mode.
            Default is True.

    Returns:
        dict with metics computed during the training on loader
    """
    model.train()
    metrics = {"loss": 0.0}
    n_batches = len(loader)

    indices_to_save = [int(n_batches * pcnt) for pcnt in np.arange(0.1, 1, 0.1)]
    last_score = 0.0

    with tqdm(total=len(loader), desc="train", disable=not verbose) as progress:
        for idx, batch in enumerate(loader):
            (images, targets, target_availabilities, acceleration,) = t2d(
                (
                    batch["image"],
                    batch["target_positions"],
                    batch["target_availabilities"],
                    batch["xy_acceleration"],
                ),
                device,
            )

            zero_grad(optimizer)

            predictions, confidences = model(images, acceleration)
            loss = loss_fn(targets, predictions, confidences, target_availabilities)

            _loss = loss.detach().item()

            metrics["loss"] += _loss

            if (idx + 1) % 30_000 == 0 and validation_fn is not None:
                score = validation_fn(model=model, device=device)
                model.train()
                last_score = score

                if logdir is not None:
                    checkpoint = make_checkpoint("train", idx + 1, model)
                    save_checkpoint(checkpoint, logdir, f"train_{idx}.pth")
            else:
                score = None

            if tensorboard_logger is not None:

                tensorboard_logger.metric("loss", _loss, idx)

            loss.backward()

            progress.set_postfix_str(
                f"loss - {_loss:.5f}"
                # f"loss - {_loss:.5f}"
                f"loss - {_loss:.5f}, score - {last_score:.5f}"
            )
            progress.update(1)

            if (idx + 1) % accumulation_steps == 0:
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()

            if idx == DEBUG:
                break
Beispiel #8
0
def train_fn(
    model,
    loader,
    device,
    loss_fn,
    optimizer,
    scheduler=None,
    accumulation_steps=1,
    verbose=True,
    tensorboard_logger=None,
    # logdir=None,
    # validation_fn=None,
):
    """Train step.

    Args:
        model (nn.Module): model to train
        loader (DataLoader): loader with data
        device (str or torch.device): device to use for placing batches
        loss_fn (nn.Module): loss function, should be callable
        optimizer (torch.optim.Optimizer): model parameters optimizer
        scheduler ([type], optional): batch scheduler to use.
            Default is `None`.
        accumulation_steps (int, optional): number of steps to accumulate gradients.
            Default is `1`.
        verbose (bool, optional): verbosity mode.
            Default is True.

    Returns:
        dict with metics computed during the training on loader
    """
    model.train()
    metrics = {"regression_loss": 0.0, "mask_loss": 0.0, "loss": 0.0}
    n_batches = len(loader)

    indices_to_save = [
        int(n_batches * pcnt) for pcnt in np.arange(0.1, 1, 0.1)
    ]
    # last_score = 0.0

    with tqdm(total=len(loader), desc="train",
              disable=not verbose) as progress:
        for idx, batch in enumerate(loader):
            (images, targets, target_availabilities, squares, masks) = t2d(
                (
                    batch["image"],
                    batch["target_positions"],
                    batch["target_availabilities"],
                    batch["square_category"],
                    batch["mask"],
                ),
                device,
            )

            zero_grad(optimizer)

            predictions, confidences, masks_logits = model(images, squares)
            rloss = loss_fn(targets, predictions, confidences,
                            target_availabilities)
            mloss = 1e4 * F.binary_cross_entropy_with_logits(
                masks_logits, masks)
            loss = rloss + mloss

            _rloss = rloss.detach().item()
            _mloss = mloss.detach().item()
            _loss = loss.detach().item()
            metrics["regression_loss"] += _rloss
            metrics["mask_loss"] += _mloss
            metrics["loss"] += _loss

            # if (idx + 1) % 30_000 == 0 and validation_fn is not None:
            #     score = validation_fn(model=model, device=device)
            #     model.train()
            #     last_score = score

            #     if logdir is not None:
            #         checkpoint = make_checkpoint("train", idx + 1, model)
            #         save_checkpoint(checkpoint, logdir, f"train_{idx}.pth")
            # else:
            #     score = None

            if tensorboard_logger is not None:
                tensorboard_logger.metric("regression_loss", _rloss, idx)
                tensorboard_logger.metric("mask_loss", _mloss, idx)
                tensorboard_logger.metric("loss", _loss, idx)

                if (idx + 1) % 5_000 == 0:
                    # masks_gt - (bs)x(1)x(h)x(w)
                    # masks - (bs)x(1)x(h)x(w)
                    tensorboard_logger.writer.add_images(
                        "gt_vs_mask",
                        torch.cat([masks, torch.sigmoid(masks_logits)],
                                  dim=-1),
                        idx,
                    )

            loss.backward()

            progress.set_postfix_str(
                f"rloss - {_rloss:.5f}, "
                f"mloss - {_mloss:.5f}, "
                f"loss - {_loss:.5f}"
                # f"loss - {_loss:.5f}"
                # f"loss - {_loss:.5f}, score - {last_score:.5f}"
            )
            progress.update(1)

            if (idx + 1) % accumulation_steps == 0:
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()

            if idx == DEBUG:
                break
Beispiel #9
0
test_dataloader = DataLoader(test_dataset,
                             shuffle=False,
                             batch_size=32,
                             num_workers=30)

model.eval()
torch.set_grad_enabled(False)

# store information for evaluation
future_coords_offsets_pd = []
ground_truth = []
timestamps = []
confidences_list = []
agent_ids = []

with tqdm(total=len(test_dataloader)) as progress:
    for batch in test_dataloader:
        # inputs = batch["image"].to(device), batch["xy_acceleration"].to(device)

        preds, confidences = model(batch["image"].to(device),
                                   batch["xy_acceleration"].to(device))

        # TODO: fix coordinates
        _gt = batch["target_positions"].cpu().numpy().copy()
        preds = preds.cpu().numpy().copy()
        world_from_agents = batch["world_from_agent"].numpy()
        centroids = batch["centroid"].numpy()
        for idx in range(len(preds)):
            for mode in range(n_trajectories):
                # FIX
                preds[idx, mode, :, :] = (transform_points(
Beispiel #10
0
def train_fn(
    model,
    loader,
    device,
    loss_fn,
    optimizer,
    scheduler=None,
    accumulation_steps=1,
    verbose=True,
    logdir=None,
):
    """Train step.
    Args:
        model (nn.Module): model to train
        loader (DataLoader): loader with data
        device (str or torch.device): device to use for placing batches
        loss_fn (nn.Module): loss function, should be callable
        optimizer (torch.optim.Optimizer): model parameters optimizer
        scheduler ([type], optional): batch scheduler to use.
            Default is `None`.
        accumulation_steps (int, optional): number of steps to accumulate gradients.
            Default is `1`.
        verbose (bool, optional): verbosity mode.
            Default is True.
    Returns:
        dict with metics computed during the training on loader
    """
    model.train()
    metrics = {"loss": 0.0}
    save_batches = [
        int(len(loader) * pcnt) for pcnt in np.arange(0.1, 1.0, 0.1)
    ]
    with tqdm(total=len(loader), desc="train",
              disable=not verbose) as progress:
        for idx, batch in enumerate(loader):
            images, targets, target_availabilities = t2d(
                (
                    batch["image"],
                    batch["target_positions"],
                    batch["target_availabilities"],
                ),
                device,
            )

            zero_grad(optimizer)

            predictions, confidences = model(images)
            loss = loss_fn(targets, predictions, confidences,
                           target_availabilities)

            _loss = loss.detach().item()
            metrics["loss"] += _loss

            loss.backward()

            progress.set_postfix_str(f"loss - {_loss:.5f}")
            progress.update(1)

            if (idx + 1) in save_batches and logdir is not None:
                checkpoint = make_checkpoint(stage="_train",
                                             epoch=(idx + 1),
                                             model=model,
                                             optimizer=optimizer)
                save_checkpoint(checkpoint, logdir / "_train",
                                f"batch_{idx+1}.pth")

            if (idx + 1) % accumulation_steps == 0:
                optimizer.step()
                if scheduler is not None:
                    scheduler.step()

            if idx == DEBUG:
                break

    metrics["loss"] /= idx + 1
    return metrics