def cifar10_adam_train_fn(batch, lr, betas, activation): """Create training instance for CIFAR-10 Adam optimization. Parameters: ----------- lr : float Learning rate for Adam betas : (float, float) Coefficients for computing running averages in Adam activation : str, 'relu' or 'sigmoid' or 'tanh' Activation function """ # logging directory # ----------------- # directory of run run_name = dirname_from_params(act=activation, opt="adam", batch=batch, lr=lr, b1=betas[0], b2=betas[1]) logdir = path.join(data_dir, run_name) # training procedure # ------------------ def training_fn(): """Training function setting up the train instance.""" act = activation_dict[activation] model = cifar10_c4d3(conv_activation=act, dense_activation=act) loss_function = CrossEntropyLoss() data_loader = CIFAR10Loader(train_batch_size=batch, test_batch_size=test_batch) optimizer = Adam(model.parameters(), lr=lr, betas=betas) # initialize training train = FirstOrderTraining( model, loss_function, optimizer, data_loader, logdir, epochs, logs_per_epoch=logs_per_epoch, device=device, ) return train return training_fn
def cifar10_sgd_train_fn(batch, lr, momentum, activation): """Create training instance for CIFAR-10 SGD optimization. Parameters: ----------- lr : float Learning rate for SGD momentum : float Momentum for SGD activation : str, 'relu' or 'sigmoid' or 'tanh' Activation function """ # logging directory # ----------------- # directory of run run_name = dirname_from_params(act=activation, opt="sgd", batch=batch, lr=lr, mom=momentum) logdir = path.join(data_dir, run_name) # training procedure # ------------------ def training_fn(): """Training function setting up the train instance.""" act = activation_dict[activation] model = cifar10_c4d3(conv_activation=act, dense_activation=act) loss_function = CrossEntropyLoss() data_loader = CIFAR10Loader(train_batch_size=batch, test_batch_size=test_batch) optimizer = SGD(model.parameters(), lr=lr, momentum=momentum) # initialize training train = FirstOrderTraining( model, loss_function, optimizer, data_loader, logdir, epochs, logs_per_epoch=logs_per_epoch, device=device, ) return train return training_fn
def cifar10_sgd_train_fn(): """Create training instance for CIFAR10 SGD experiment.""" # hyper parameters # ---------------- lr = 0.1 momentum = 0.9 # logging directory # ----------------- # directory of run run_name = dirname_from_params(opt="sgd", batch=batch, lr=lr, mom=momentum) logdir = path.join(data_dir, run_name) # training procedure # ------------------ def training_fn(): """Training function setting up the train instance.""" # setting up training and run model = cifar10_model() loss_function = CrossEntropyLoss() data_loader = CIFAR10Loader(train_batch_size=batch, test_batch_size=batch) optimizer = SGD(model.parameters(), lr=lr, momentum=momentum) # initialize training train = FirstOrderTraining( model, loss_function, optimizer, data_loader, logdir, epochs, logs_per_epoch=logs_per_epoch, device=device, ) return train return training_fn
def cifar10_cgnewton_train_fn( batch, modify_2nd_order_terms, activation, lr, alpha, cg_maxiter, cg_tol, cg_atol ): """Create training instance for CIFAR10 CG experiment. Parameters: ----------- batch : int Batch size modify_2nd_order_terms : str Strategy for treating 2nd-order effects of module functions: * `'zero'`: Yields the Generalizes Gauss Newton matrix * `'abs'`: BDA-PCH approximation * `'clip'`: Different BDA-PCH approximation activation : str, 'relu' or 'sigmoid' or 'tanh' Activation function lr : float Learning rate alpha : float, between 0 and 1 Regularization in HVP, see Chen paper for more details cg_maxiter : int Maximum number of iterations for CG cg_tol : float Relative tolerance for convergence of CG cg_atol : float Absolute tolerance for convergence of CG """ # logging directory # ----------------- # directory of run run_name = dirname_from_params( act=activation, opt="cgn", batch=batch, lr=lr, alpha=alpha, maxiter=cg_maxiter, tol=cg_tol, atol=cg_atol, mod2nd=modify_2nd_order_terms, ) logdir = path.join(data_dir, run_name) # training procedure # ------------------ def training_fn(): """Training function setting up the train instance.""" # set up training and run act = activation_dict[activation] model = cifar10_c4d3(conv_activation=act, dense_activation=act) model = convert_torch_to_cvp(model) loss_function = convert_torch_to_cvp(CrossEntropyLoss()) data_loader = CIFAR10Loader(train_batch_size=batch, test_batch_size=test_batch) optimizer = CGNewton( model.parameters(), lr=lr, alpha=alpha, cg_atol=cg_atol, cg_tol=cg_tol, cg_maxiter=cg_maxiter, ) # initialize training train = CVPSecondOrderTraining( model, loss_function, optimizer, data_loader, logdir, epochs, modify_2nd_order_terms, logs_per_epoch=logs_per_epoch, device=device, ) return train return training_fn
def cifar10_kfac_train_fn( batch, activation, lr, momentum, stat_decay, damping, kl_clip, weight_decay, TCov, TInv, batch_averaged, ): """Create training instance for CIFAR10 KFAC experiment.""" # logging directory # ----------------- # directory of run run_name = dirname_from_params( act=activation, opt="kfac", batch=batch, lr=lr, mom=momentum, stat_dec=stat_decay, damp=damping, weight_dec=weight_decay, kl_clip=kl_clip, TCov=TCov, TInv=TInv, batch_avg=batch_averaged, ) logdir = path.join(data_dir, run_name) # training procedure # ------------------ def training_fn(): """Training function setting up the train instance.""" # set up training and run act = activation_dict[activation] model = cifar10_c4d3(conv_activation=act, dense_activation=act) loss_function = CrossEntropyLoss() data_loader = CIFAR10Loader(train_batch_size=batch, test_batch_size=test_batch) optimizer = KFACOptimizer( model=model, lr=lr, momentum=momentum, stat_decay=stat_decay, damping=damping, kl_clip=kl_clip, weight_decay=weight_decay, TCov=TCov, TInv=TInv, batch_averaged=batch_averaged, ) # initialize training (no second backward pass needed) train = KFACTraining( model, loss_function, optimizer, data_loader, logdir, epochs, logs_per_epoch=logs_per_epoch, device=device, ) return train return training_fn
def cifar10_cgnewton_train_fn(modify_2nd_order_terms): """Create training instance for CIFAR10 CG experiment Parameters: ----------- modify_2nd_order_terms : (str) Strategy for treating 2nd-order effects of module functions: * `'zero'`: Yields the Generalizes Gauss Newton matrix * `'abs'`: BDA-PCH approximation * `'clip'`: Different BDA-PCH approximation """ # hyper parameters # ---------------- lr = 0.1 alpha = 0.02 cg_maxiter = 50 cg_tol = 0.1 cg_atol = 0 # logging directory # ----------------- # directory of run run_name = dirname_from_params( opt="cgn", batch=batch, lr=lr, alpha=alpha, maxiter=cg_maxiter, tol=cg_tol, atol=cg_atol, mod2nd=modify_2nd_order_terms, ) logdir = path.join(data_dir, run_name) # training procedure # ------------------ def training_fn(): """Training function setting up the train instance.""" # set up training and run model = hbp_cifar10_model(average_input_jacobian=True, average_parameter_jacobian=True) loss_function = HBPCrossEntropyLoss() data_loader = CIFAR10Loader(train_batch_size=batch, test_batch_size=batch) optimizer = CGNewton( model.parameters(), lr=lr, alpha=alpha, cg_atol=cg_atol, cg_tol=cg_tol, cg_maxiter=cg_maxiter, ) # initialize training train = HBPSecondOrderTraining( model, loss_function, optimizer, data_loader, logdir, epochs, modify_2nd_order_terms, logs_per_epoch=logs_per_epoch, device=device, ) return train return training_fn