예제 #1
0
def cifar10_adam_train_fn(batch, lr, betas, activation):
    """Create training instance for CIFAR-10 Adam optimization.

    Parameters:
    -----------
    lr : float
        Learning rate for Adam
    betas : (float, float)
        Coefficients for computing running averages in Adam
    activation : str, 'relu' or 'sigmoid' or 'tanh'
        Activation function
    """
    # logging directory
    # -----------------
    # directory of run
    run_name = dirname_from_params(act=activation,
                                   opt="adam",
                                   batch=batch,
                                   lr=lr,
                                   b1=betas[0],
                                   b2=betas[1])
    logdir = path.join(data_dir, run_name)

    # training procedure
    # ------------------
    def training_fn():
        """Training function setting up the train instance."""
        act = activation_dict[activation]
        model = cifar10_c4d3(conv_activation=act, dense_activation=act)
        loss_function = CrossEntropyLoss()
        data_loader = CIFAR10Loader(train_batch_size=batch,
                                    test_batch_size=test_batch)
        optimizer = Adam(model.parameters(), lr=lr, betas=betas)
        # initialize training
        train = FirstOrderTraining(
            model,
            loss_function,
            optimizer,
            data_loader,
            logdir,
            epochs,
            logs_per_epoch=logs_per_epoch,
            device=device,
        )
        return train

    return training_fn
예제 #2
0
def cifar10_sgd_train_fn(batch, lr, momentum, activation):
    """Create training instance for CIFAR-10 SGD optimization.

    Parameters:
    -----------
    lr : float
        Learning rate for SGD
    momentum : float
        Momentum for SGD
    activation : str, 'relu' or 'sigmoid' or 'tanh'
        Activation function
    """
    # logging directory
    # -----------------
    # directory of run
    run_name = dirname_from_params(act=activation,
                                   opt="sgd",
                                   batch=batch,
                                   lr=lr,
                                   mom=momentum)
    logdir = path.join(data_dir, run_name)

    # training procedure
    # ------------------
    def training_fn():
        """Training function setting up the train instance."""
        act = activation_dict[activation]
        model = cifar10_c4d3(conv_activation=act, dense_activation=act)
        loss_function = CrossEntropyLoss()
        data_loader = CIFAR10Loader(train_batch_size=batch,
                                    test_batch_size=test_batch)
        optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)
        # initialize training
        train = FirstOrderTraining(
            model,
            loss_function,
            optimizer,
            data_loader,
            logdir,
            epochs,
            logs_per_epoch=logs_per_epoch,
            device=device,
        )
        return train

    return training_fn
예제 #3
0
def cifar10_sgd_train_fn():
    """Create training instance for CIFAR10 SGD experiment."""
    # hyper parameters
    # ----------------
    lr = 0.1
    momentum = 0.9

    # logging directory
    # -----------------
    # directory of run
    run_name = dirname_from_params(opt="sgd", batch=batch, lr=lr, mom=momentum)
    logdir = path.join(data_dir, run_name)

    # training procedure
    # ------------------
    def training_fn():
        """Training function setting up the train instance."""
        # setting up training and run
        model = cifar10_model()
        loss_function = CrossEntropyLoss()
        data_loader = CIFAR10Loader(train_batch_size=batch,
                                    test_batch_size=batch)
        optimizer = SGD(model.parameters(), lr=lr, momentum=momentum)
        # initialize training
        train = FirstOrderTraining(
            model,
            loss_function,
            optimizer,
            data_loader,
            logdir,
            epochs,
            logs_per_epoch=logs_per_epoch,
            device=device,
        )
        return train

    return training_fn
예제 #4
0
def cifar10_cgnewton_train_fn(
    batch, modify_2nd_order_terms, activation, lr, alpha, cg_maxiter, cg_tol, cg_atol
):
    """Create training instance for CIFAR10 CG experiment.

    Parameters:
    -----------
    batch : int
        Batch size
    modify_2nd_order_terms : str
        Strategy for treating 2nd-order effects of module functions:
        * `'zero'`: Yields the Generalizes Gauss Newton matrix
        * `'abs'`: BDA-PCH approximation
        * `'clip'`: Different BDA-PCH approximation
    activation : str, 'relu' or 'sigmoid' or 'tanh'
        Activation function
    lr : float
        Learning rate
    alpha : float, between 0 and 1
        Regularization in HVP, see Chen paper for more details
    cg_maxiter : int
        Maximum number of iterations for CG
    cg_tol : float
        Relative tolerance for convergence of CG
    cg_atol : float
        Absolute tolerance for convergence of CG
    """
    # logging directory
    # -----------------
    # directory of run
    run_name = dirname_from_params(
        act=activation,
        opt="cgn",
        batch=batch,
        lr=lr,
        alpha=alpha,
        maxiter=cg_maxiter,
        tol=cg_tol,
        atol=cg_atol,
        mod2nd=modify_2nd_order_terms,
    )
    logdir = path.join(data_dir, run_name)

    # training procedure
    # ------------------
    def training_fn():
        """Training function setting up the train instance."""
        # set up training and run
        act = activation_dict[activation]
        model = cifar10_c4d3(conv_activation=act, dense_activation=act)
        model = convert_torch_to_cvp(model)
        loss_function = convert_torch_to_cvp(CrossEntropyLoss())
        data_loader = CIFAR10Loader(train_batch_size=batch, test_batch_size=test_batch)
        optimizer = CGNewton(
            model.parameters(),
            lr=lr,
            alpha=alpha,
            cg_atol=cg_atol,
            cg_tol=cg_tol,
            cg_maxiter=cg_maxiter,
        )
        # initialize training
        train = CVPSecondOrderTraining(
            model,
            loss_function,
            optimizer,
            data_loader,
            logdir,
            epochs,
            modify_2nd_order_terms,
            logs_per_epoch=logs_per_epoch,
            device=device,
        )
        return train

    return training_fn
예제 #5
0
def cifar10_kfac_train_fn(
    batch,
    activation,
    lr,
    momentum,
    stat_decay,
    damping,
    kl_clip,
    weight_decay,
    TCov,
    TInv,
    batch_averaged,
):
    """Create training instance for CIFAR10 KFAC experiment."""
    # logging directory
    # -----------------
    # directory of run
    run_name = dirname_from_params(
        act=activation,
        opt="kfac",
        batch=batch,
        lr=lr,
        mom=momentum,
        stat_dec=stat_decay,
        damp=damping,
        weight_dec=weight_decay,
        kl_clip=kl_clip,
        TCov=TCov,
        TInv=TInv,
        batch_avg=batch_averaged,
    )
    logdir = path.join(data_dir, run_name)

    # training procedure
    # ------------------
    def training_fn():
        """Training function setting up the train instance."""
        # set up training and run
        act = activation_dict[activation]
        model = cifar10_c4d3(conv_activation=act, dense_activation=act)
        loss_function = CrossEntropyLoss()
        data_loader = CIFAR10Loader(train_batch_size=batch,
                                    test_batch_size=test_batch)
        optimizer = KFACOptimizer(
            model=model,
            lr=lr,
            momentum=momentum,
            stat_decay=stat_decay,
            damping=damping,
            kl_clip=kl_clip,
            weight_decay=weight_decay,
            TCov=TCov,
            TInv=TInv,
            batch_averaged=batch_averaged,
        )
        # initialize training (no second backward pass needed)
        train = KFACTraining(
            model,
            loss_function,
            optimizer,
            data_loader,
            logdir,
            epochs,
            logs_per_epoch=logs_per_epoch,
            device=device,
        )
        return train

    return training_fn
예제 #6
0
def cifar10_cgnewton_train_fn(modify_2nd_order_terms):
    """Create training instance for CIFAR10 CG experiment

    Parameters:
    -----------
    modify_2nd_order_terms : (str)
        Strategy for treating 2nd-order effects of module functions:
        * `'zero'`: Yields the Generalizes Gauss Newton matrix
        * `'abs'`: BDA-PCH approximation
        * `'clip'`: Different BDA-PCH approximation
    """
    # hyper parameters
    # ----------------
    lr = 0.1
    alpha = 0.02
    cg_maxiter = 50
    cg_tol = 0.1
    cg_atol = 0

    # logging directory
    # -----------------
    # directory of run
    run_name = dirname_from_params(
        opt="cgn",
        batch=batch,
        lr=lr,
        alpha=alpha,
        maxiter=cg_maxiter,
        tol=cg_tol,
        atol=cg_atol,
        mod2nd=modify_2nd_order_terms,
    )
    logdir = path.join(data_dir, run_name)

    # training procedure
    # ------------------
    def training_fn():
        """Training function setting up the train instance."""
        # set up training and run
        model = hbp_cifar10_model(average_input_jacobian=True,
                                  average_parameter_jacobian=True)
        loss_function = HBPCrossEntropyLoss()
        data_loader = CIFAR10Loader(train_batch_size=batch,
                                    test_batch_size=batch)
        optimizer = CGNewton(
            model.parameters(),
            lr=lr,
            alpha=alpha,
            cg_atol=cg_atol,
            cg_tol=cg_tol,
            cg_maxiter=cg_maxiter,
        )
        # initialize training
        train = HBPSecondOrderTraining(
            model,
            loss_function,
            optimizer,
            data_loader,
            logdir,
            epochs,
            modify_2nd_order_terms,
            logs_per_epoch=logs_per_epoch,
            device=device,
        )
        return train

    return training_fn