Example #1
0
def train_bayes_MSE_optim(model: DeepMoD,
                          data: torch.Tensor,
                          target: torch.Tensor,
                          optimizer,
                          sparsity_scheduler,
                          split: float = 0.8,
                          log_dir: Optional[str] = None,
                          max_iterations: int = 10000,
                          write_iterations: int = 25,
                          **convergence_kwargs) -> None:
    """Stops training when it reaches minimum MSE.

    Args:
        model (DeepMoD): [description]
        data (torch.Tensor): [description]
        target (torch.Tensor): [description]
        optimizer ([type]): [description]
        sparsity_scheduler ([type]): [description]
        log_dir (Optional[str], optional): [description]. Defaults to None.
        max_iterations (int, optional): [description]. Defaults to 10000.
    """
    start_time = time.time()
    board = Tensorboard(log_dir)  # initializing tb board

    # Splitting data, assumes data is already randomized
    n_train = int(split * data.shape[0])
    n_test = data.shape[0] - n_train
    data_train, data_test = torch.split(data, [n_train, n_test], dim=0)
    target_train, target_test = torch.split(target, [n_train, n_test], dim=0)

    # Training
    print(
        '| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |'
    )
    for iteration in np.arange(0, max_iterations + 1):
        # ================== Training Model ============================
        prediction, time_derivs, thetas = model(data_train)

        MSE = torch.mean((prediction - target_train)**2,
                         dim=0)  # loss per output
        Reg = torch.stack([
            torch.mean((dt - theta @ coeff_vector)**2)
            for dt, theta, coeff_vector in zip(
                time_derivs, thetas,
                model.constraint_coeffs(scaled=False, sparse=True))
        ])
        loss = torch.sum(
            torch.exp(-model.s[:, 0]) * MSE + torch.sum(model.s[:, 0]))

        # Optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if iteration % write_iterations == 0:
            # ================== Validation costs ================
            prediction_test, coordinates = model.func_approx(data_test)
            time_derivs_test, thetas_test = model.library(
                (prediction_test, coordinates))
            with torch.no_grad():
                MSE_test = torch.mean((prediction_test - target_test)**2,
                                      dim=0)  # loss per output
                Reg_test = torch.stack([
                    torch.mean((dt - theta @ coeff_vector)**2)
                    for dt, theta, coeff_vector in zip(
                        time_derivs_test, thetas_test,
                        model.constraint_coeffs(scaled=False, sparse=True))
                ])
                loss_test = torch.sum(MSE_test + Reg_test)

            # ====================== Logging =======================
            _ = model.sparse_estimator(
                thetas, time_derivs
            )  # calculating l1 adjusted coeffs but not setting mask
            estimator_coeff_vectors = model.estimator_coeffs()
            l1_norm = torch.sum(torch.abs(
                torch.cat(model.constraint_coeffs(sparse=True, scaled=True),
                          dim=1)),
                                dim=0)
            progress(iteration, start_time, max_iterations, loss.item(),
                     torch.sum(MSE).item(),
                     torch.sum(Reg).item(),
                     torch.sum(l1_norm).item())
            board.write(iteration,
                        loss,
                        MSE,
                        Reg,
                        l1_norm,
                        model.constraint_coeffs(sparse=True, scaled=True),
                        model.constraint_coeffs(sparse=True, scaled=False),
                        estimator_coeff_vectors,
                        MSE_test=MSE_test,
                        Reg_test=Reg_test,
                        loss_test=loss_test)

            # ================== Sparsity update =============
            # Updating sparsity and or convergence
            if iteration % write_iterations == 0:
                sparsity_scheduler(iteration, torch.sum(MSE_test), model,
                                   optimizer)

                if sparsity_scheduler.apply_sparsity is True:
                    with torch.no_grad():
                        model.constraint.sparsity_masks = model.sparse_estimator(
                            thetas, time_derivs)
                        break
    board.close()
Example #2
0
def train_bayes_full_optim(model: DeepMoD,
                           data: torch.Tensor,
                           target: torch.Tensor,
                           optimizer,
                           sparsity_scheduler,
                           split: float = 0.8,
                           log_dir: Optional[str] = None,
                           max_iterations: int = 10000,
                           write_iterations: int = 25,
                           **convergence_kwargs) -> None:
    """Stops training when it reaches minimum MSE.

    Args:
        model (DeepMoD): [description]
        data (torch.Tensor): [description]
        target (torch.Tensor): [description]
        optimizer ([type]): [description]
        sparsity_scheduler ([type]): [description]
        log_dir (Optional[str], optional): [description]. Defaults to None.
        max_iterations (int, optional): [description]. Defaults to 10000.
    """
    start_time = time.time()
    board = Tensorboard(log_dir)  # initializing tb board

    # Splitting data, assumes data is already randomized
    n_train = int(split * data.shape[0])
    n_test = data.shape[0] - n_train
    data_train, data_test = torch.split(data, [n_train, n_test], dim=0)
    target_train, target_test = torch.split(target, [n_train, n_test], dim=0)

    # Training
    print(
        '| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |'
    )
    for iteration in np.arange(0, max_iterations + 1):
        # ================== Training Model ============================
        prediction, time_derivs, thetas = model(data_train)
        MSE = torch.mean((prediction - target_train)**2,
                         dim=0)  # loss per output

        t = time_derivs[0]
        Theta = thetas[0]
        if iteration == 0:
            sk_reg = BayesianRidge(fit_intercept=False,
                                   compute_score=True,
                                   alpha_1=0,
                                   alpha_2=0,
                                   lambda_1=0,
                                   lambda_2=0)
            sk_reg.fit(thetas[0].cpu().detach().numpy(),
                       time_derivs[0].cpu().detach().numpy())
            model.s.data[:, 0] = torch.log(1 / MSE.data)
            model.s.data[:, 1] = torch.log(torch.tensor(sk_reg.lambda_))
            model.s.data[:, 2] = torch.log(torch.tensor(sk_reg.alpha_))

        tau = torch.exp(model.s[:, 0])  #torch.exp(-model.s[:, 0])
        alpha = torch.exp(
            model.s[:, 1]
        )  #torch.exp(-model.s[:, 1])#torch.exp(model.s[:, 1]) #1 / MSE[0].data
        beta = torch.exp(
            model.s[:, 2]
        )  #torch.exp(-model.s[:, 2])#torch.exp(model.s[:, 2]) #torch.tensor(1e-5).to(Theta.device)

        M = Theta.shape[1]
        N = Theta.shape[0]

        # Posterior std and mean
        A = torch.eye(M).to(Theta.device) * alpha + beta * Theta.T @ Theta
        mn = beta * torch.inverse(A) @ Theta.T @ t

        loss_reg = -1 / 2 * (
            M * torch.log(alpha) + N * torch.log(beta) - beta *
            (t - Theta @ mn).T @ (t - Theta @ mn) - alpha * mn.T @ mn -
            torch.trace(torch.log(A)) - N * np.log(2 * np.pi))

        Reg = torch.stack([
            torch.mean((dt - theta @ coeff_vector)**2)
            for dt, theta, coeff_vector in zip(
                time_derivs, thetas,
                model.constraint_coeffs(scaled=False, sparse=True))
        ])
        print(N / 2 * tau * MSE - N / 2 * torch.log(tau), loss_reg)
        loss = torch.sum((N / 2 * tau * MSE - N / 2 * torch.log(tau)) +
                         loss_reg)

        # Optimizer step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if iteration % write_iterations == 0:
            # ================== Validation costs ================
            prediction_test, coordinates = model.func_approx(data_test)
            time_derivs_test, thetas_test = model.library(
                (prediction_test, coordinates))
            with torch.no_grad():
                MSE_test = torch.mean((prediction_test - target_test)**2,
                                      dim=0)  # loss per output
                Reg_test = torch.stack([
                    torch.mean((dt - theta @ coeff_vector)**2)
                    for dt, theta, coeff_vector in zip(
                        time_derivs_test, thetas_test,
                        model.constraint_coeffs(scaled=False, sparse=True))
                ])
                loss_test = torch.sum(MSE_test + Reg_test)

            # ====================== Logging =======================
            _ = model.sparse_estimator(
                thetas, time_derivs
            )  # calculating l1 adjusted coeffs but not setting mask
            estimator_coeff_vectors = model.estimator_coeffs()
            l1_norm = torch.sum(torch.abs(
                torch.cat(model.constraint_coeffs(sparse=True, scaled=True),
                          dim=1)),
                                dim=0)
            progress(iteration, start_time, max_iterations, loss.item(),
                     torch.sum(MSE).item(),
                     torch.sum(Reg).item(),
                     torch.sum(l1_norm).item())
            board.write(iteration,
                        loss,
                        MSE,
                        Reg,
                        l1_norm,
                        model.constraint_coeffs(sparse=True, scaled=True),
                        model.constraint_coeffs(sparse=True, scaled=False),
                        estimator_coeff_vectors,
                        MSE_test=MSE_test,
                        Reg_test=Reg_test,
                        loss_test=loss_test,
                        s=model.s)

            # ================== Sparsity update =============
            # Updating sparsity and or convergence
            if iteration % write_iterations == 0:
                sparsity_scheduler(iteration, torch.sum(MSE_test), model,
                                   optimizer)

                if sparsity_scheduler.apply_sparsity is True:
                    with torch.no_grad():
                        model.constraint.sparsity_masks = model.sparse_estimator(
                            thetas, time_derivs)
                        break
    board.close()
Example #3
0
def train_gradnorm(model: DeepMoD,
                   data: torch.Tensor,
                   target: torch.Tensor,
                   optimizer,
                   sparsity_scheduler,
                   alpha,
                   split: float = 0.8,
                   log_dir: Optional[str] = None,
                   max_iterations: int = 10000,
                   write_iterations: int = 25,
                   **convergence_kwargs) -> None:
    """Stops training when it reaches minimum MSE.

    Args:
        model (DeepMoD): [description]
        data (torch.Tensor): [description]
        target (torch.Tensor): [description]
        optimizer ([type]): [description]
        sparsity_scheduler ([type]): [description]
        log_dir (Optional[str], optional): [description]. Defaults to None.
        max_iterations (int, optional): [description]. Defaults to 10000.
    """
    start_time = time.time()
    board = Tensorboard(log_dir)  # initializing tb board

    # Splitting data, assumes data is already randomized
    n_train = int(split * data.shape[0])
    n_test = data.shape[0] - n_train
    data_train, data_test = torch.split(data, [n_train, n_test], dim=0)
    target_train, target_test = torch.split(target, [n_train, n_test], dim=0)

    # Training
    print(
        '| Iteration | Progress | Time remaining |     Loss |      MSE |      Reg |    L1 norm |'
    )
    for iteration in np.arange(0, max_iterations + 1):
        # ================== Training Model ============================
        prediction, time_derivs, thetas = model(data_train)
        MSE = torch.mean((prediction - target_train)**2,
                         dim=0)  # loss per output
        Reg = torch.cat([
            torch.mean((dt - theta @ coeff_vector)**2, dim=0)
            for dt, theta, coeff_vector in zip(
                time_derivs, thetas,
                model.constraint_coeffs(scaled=False, sparse=True))
        ])
        task_loss = (torch.exp(model.weights) * torch.stack(
            (MSE, Reg), axis=1)).flatten()  # weighted losses
        loss = torch.sum(task_loss)

        if iteration == 0:  # Getting initial loss
            ini_loss = task_loss.data
        if torch.any(task_loss.data > ini_loss):
            ini_loss[task_loss.data > ini_loss] = task_loss.data[
                task_loss.data > ini_loss]

        # Getting original grads
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        model.weights.grad.data = model.weights.grad.data * 0.0  # setting weight grads to zero

        # Getting Grads to normalize
        G = torch.tensor([
            torch.norm(
                torch.autograd.grad(loss_i,
                                    list(model.parameters())[-2],
                                    retain_graph=True,
                                    create_graph=True)[0], 2)
            for loss_i in task_loss
        ]).to(data.device)
        G_mean = torch.mean(G)

        # Calculating relative losses
        rel_loss = task_loss / ini_loss
        inv_train_rate = rel_loss / torch.mean(rel_loss)

        # Calculating grad norm loss
        grad_norm_loss = torch.sum(
            torch.abs(G - G_mean * inv_train_rate**alpha))

        # Setting grads
        model.weights.grad = torch.autograd.grad(grad_norm_loss,
                                                 model.weights)[0]

        # do a step with the optimizer
        optimizer.step()

        # renormalize
        normalize_coeff = task_loss.shape[0] / torch.sum(model.weights)
        model.weights.data = torch.log(
            torch.exp(model.weights.data) * normalize_coeff)

        if iteration % write_iterations == 0:
            # ================== Validation costs ================
            prediction_test, coordinates = model.func_approx(data_test)
            time_derivs_test, thetas_test = model.library(
                (prediction_test, coordinates))
            with torch.no_grad():
                MSE_test = torch.mean((prediction_test - target_test)**2,
                                      dim=0)  # loss per output
                Reg_test = torch.stack([
                    torch.mean((dt - theta @ coeff_vector)**2)
                    for dt, theta, coeff_vector in zip(
                        time_derivs_test, thetas_test,
                        model.constraint_coeffs(scaled=False, sparse=True))
                ])
                loss_test = model.weights @ torch.stack((MSE, Reg), axis=0)

            # ====================== Logging =======================
            _ = model.sparse_estimator(
                thetas, time_derivs
            )  # calculating l1 adjusted coeffs but not setting mask
            estimator_coeff_vectors = model.estimator_coeffs()
            l1_norm = torch.sum(torch.abs(
                torch.cat(model.constraint_coeffs(sparse=True, scaled=True),
                          dim=1)),
                                dim=0)
            progress(iteration, start_time, max_iterations, loss.item(),
                     torch.sum(MSE).item(),
                     torch.sum(Reg).item(),
                     torch.sum(l1_norm).item())
            board.write(iteration,
                        loss,
                        MSE,
                        Reg,
                        l1_norm,
                        model.constraint_coeffs(sparse=True, scaled=True),
                        model.constraint_coeffs(sparse=True, scaled=False),
                        estimator_coeff_vectors,
                        MSE_test=MSE_test,
                        Reg_test=Reg_test,
                        loss_test=loss_test,
                        w=model.weights)

            # ================== Sparsity update =============
            # Updating sparsity and or convergence
            if iteration % write_iterations == 0:
                sparsity_scheduler(iteration, torch.sum(MSE_test), model,
                                   optimizer)

                if sparsity_scheduler.apply_sparsity is True:
                    with torch.no_grad():
                        model.constraint.sparsity_masks = model.sparse_estimator(
                            thetas, time_derivs)
                        break
    board.close()