def run_gpflow( dset: Dataset, algorithm: Algorithm, dtype: Optional[DataType], batch_size: int, lr: float, natgrad_lr: float, var_dist: str, num_iter: int, num_centers: int, kernel_sigma: float, learn_ind_pts: bool, error_every: int, kernel_variance: float, kfold: int, seed: int, ind_pt_file: Optional[str] = None, ): import tensorflow as tf import gpflow from gpflow_model import TrainableSVGP tf.random.set_seed(seed) np.random.seed(seed) # Data types if dtype is None: dtype = DataType.float32 if dtype == DataType.float32: gpflow.config.set_default_float(np.float32) err_fns = get_err_fns(dset) # Kernel sigma_initial = np.array(kernel_sigma, dtype=dtype.to_numpy_dtype()) kernel = gpflow.kernels.SquaredExponential(lengthscales=sigma_initial, variance=kernel_variance) def get_model(Xtr, num_outputs, err_fn): # Inducing points if ind_pt_file is None or not os.path.isfile(ind_pt_file): inducing_idx = np.random.choice(Xtr.shape[0], num_centers, replace=False) inducing_points = Xtr[inducing_idx].reshape(num_centers, -1) print("Took %d random inducing points" % (inducing_points.shape[0])) else: inducing_points = np.load(ind_pt_file).astype( dtype.to_numpy_dtype()) print("Loaded %d inducing points to %s" % (inducing_points.shape[0], ind_pt_file)) num_classes = 0 if algorithm == Algorithm.GPFLOW_CLS: if num_outputs == 1: num_classes = 2 else: num_classes = num_outputs model = TrainableSVGP( kernel=kernel, inducing_points=inducing_points, batch_size=batch_size, num_iter=num_iter, err_fn=err_fn, classif=num_classes, lr=lr, var_dist=var_dist, error_every=error_every, train_hyperparams=learn_ind_pts, natgrad_lr=natgrad_lr, ) return model if kfold == 1: load_fn = get_load_fn(dset) Xtr, Ytr, Xts, Yts, kwargs = load_fn(dtype=dtype.to_numpy_dtype(), as_torch=False, as_tf=True) err_fns = [functools.partial(fn, **kwargs) for fn in err_fns] model = get_model(Xtr, Ytr.shape[1], err_fns[0]) t_s = time.time() print("Starting to train model %s on data %s" % (model, dset), flush=True) model.fit(Xtr, Ytr, Xts, Yts) print("Training of %s on %s complete in %.2fs" % (algorithm, dset, time.time() - t_s), flush=True) if model.num_classes == 2: Yts = (Yts + 1) / 2 Ytr = (Ytr + 1) / 2 test_model(model, f"{algorithm} on {dset}", Xts, Yts, Xtr, Ytr, err_fns) #if ind_pt_file is not None: # print("Inducing points: ", model.inducing_points[0]) # np.save(ind_pt_file, model.inducing_points) # print("Saved inducing points to %s" % (ind_pt_file)) else: print("Will train GPFlow on data %s with %d-fold CV" % (dset, kfold), flush=True) load_fn = get_cv_fn(dset) iteration = 0 test_errs, train_errs = [], [] for Xtr, Ytr, Xts, Yts, kwargs in load_fn(k=kfold, dtype=dtype.to_numpy_dtype(), as_torch=True): err_fns = [functools.partial(fn, **kwargs) for fn in err_fns] model = get_model(Xtr, Ytr.shape[1], err_fns[0]) t_s = time.time() model.fit(Xtr, Ytr, Xts, Yts) print("Training of %s on %s complete in %.2fs" % (algorithm, dset, time.time() - t_s), flush=True) iteration += 1 c_test_errs, c_train_errs = test_model(model, f"{algorithm} on {dset}", Xts, Yts, Xtr, Ytr, err_fns) train_errs.append(c_train_errs) test_errs.append(c_test_errs) print("Full errors: Test %s - Train %s" % (test_errs, train_errs)) print() print("%d-Fold Error Report" % (kfold)) for err_fn_i in range(len(err_fns)): print("Final test errors: %.4f +- %4f" % (np.mean( [e[err_fn_i] for e in test_errs]), np.std([e[err_fn_i] for e in test_errs]))) print("Final train errors: %.4f +- %4f" % (np.mean([e[err_fn_i] for e in train_errs ]), np.std([e[err_fn_i] for e in train_errs]))) print()
def run_falkon(dset: Dataset, algorithm: Algorithm, dtype: Optional[DataType], num_iter: int, num_centers: int, kernel_sigma: float, penalty: float, kernel: str, kfold: int, seed: int): import torch from falkon import kernels from falkon.models import falkon from falkon.utils import TicToc torch.manual_seed(seed) np.random.seed(seed) # Data types if dtype is None: dtype = DataType.float64 # Arguments if kernel.lower() == 'gaussian': k = kernels.GaussianKernel(kernel_sigma) elif kernel.lower() == 'laplacian': k = kernels.LaplacianKernel(kernel_sigma) elif kernel.lower() == 'linear': k = kernels.LinearKernel(beta=1.0, sigma=kernel_sigma) else: raise ValueError("Kernel %s not understood for algorithm %s" % (kernel, algorithm)) opt = falkon.FalkonOptions(compute_arch_speed=False, no_single_kernel=True, pc_epsilon_32=1e-6, pc_epsilon_64=1e-13, debug=True) flk = falkon.Falkon(kernel=k, penalty=penalty, M=num_centers, maxiter=num_iter, seed=seed, error_fn=None, error_every=1, options=opt) # Error metrics err_fns = get_err_fns(dset) if kfold == 1: # Load data load_fn = get_load_fn(dset) Xtr, Ytr, Xts, Yts, kwargs = load_fn(dtype=dtype.to_numpy_dtype(), as_torch=True) Xtr = Xtr.pin_memory() Ytr = Ytr.pin_memory() temp_test = torch.empty(3, 3).cuda() del temp_test err_fns = [functools.partial(fn, **kwargs) for fn in err_fns] with TicToc("FALKON ALGORITHM"): flk.error_fn = err_fns[0] print("Starting to train model %s on data %s" % (flk, dset), flush=True) flk.fit(Xtr, Ytr, Xts, Yts) test_model(flk, f"{algorithm} on {dset}", Xts, Yts, Xtr, Ytr, err_fns) else: print("Will train model %s on data %s with %d-fold CV" % (flk, dset, kfold), flush=True) load_fn = get_cv_fn(dset) iteration = 0 test_errs, train_errs = [], [] for Xtr, Ytr, Xts, Yts, kwargs in load_fn(k=kfold, dtype=dtype.to_numpy_dtype(), as_torch=True): err_fns = [functools.partial(fn, **kwargs) for fn in err_fns] with TicToc("FALKON ALGORITHM (fold %d)" % (iteration)): flk.error_every = err_fns[0] flk.fit(Xtr, Ytr, Xts, Yts) iteration += 1 c_test_errs, c_train_errs = test_model(flk, f"{algorithm} on {dset}", Xts, Yts, Xtr, Ytr, err_fns) train_errs.append(c_train_errs) test_errs.append(c_test_errs) print("Full errors: Test %s - Train %s" % (test_errs, train_errs)) print() print("%d-Fold Error Report" % (kfold)) for err_fn_i in range(len(err_fns)): print("Final test errors: %.4f +- %4f" % (np.mean( [e[err_fn_i] for e in test_errs]), np.std([e[err_fn_i] for e in test_errs]))) print("Final train errors: %.4f +- %4f" % (np.mean([e[err_fn_i] for e in train_errs ]), np.std([e[err_fn_i] for e in train_errs]))) print()
def run_epro(dset: Dataset, algorithm: Algorithm, dtype: Optional[DataType], num_iter: int, kernel_sigma: float, n_subsample: Optional[int], data_subsample: Optional[int], q: Optional[int], kfold: int, eta_divisor: int, seed: int): sys.path.append(EPRO_DIRECTORY) import tensorflow as tf from eigenpro import EigenPro import kernels tf.set_random_seed(seed) np.random.seed(seed) if dtype is None: dtype = DataType.float32 if dtype.to_numpy_dtype() != np.float32: raise RuntimeError("EigenPro can only run on single-precision floats.") # Error metrics err_fns = get_err_fns(dset) tf_err_fn = get_tf_err_fn(dset) # Create kernel kernel = functools.partial(kernels.Gaussian, s=kernel_sigma) # Additional fixed params mem_gb = 11 print( "Starting EigenPro solver with %s subsamples, %s-top eigensystem, %f eta-divisor" % (n_subsample, q, eta_divisor)) print("Random seed: %d", seed) if kfold == 1: # Load data load_fn = get_load_fn(dset) Xtr, Ytr, Xts, Yts, kwargs = load_fn(dtype=dtype.to_numpy_dtype(), as_torch=False) if data_subsample is not None: Xtr = Xtr[:data_subsample] Ytr = Ytr[:data_subsample] print("SUBSAMPLED INPUT DATA TO %d TRAINING SAMPLES" % (Xtr.shape[0]), flush=True) err_fns = [functools.partial(fn, **kwargs) for fn in err_fns] tf_err_fn = functools.partial(tf_err_fn, **kwargs) tf_err_fn.__name__ = "tf_error" model = EigenPro(kernel, Xtr, n_label=Ytr.shape[1], mem_gb=mem_gb, n_subsample=n_subsample, q=q, bs=None, metric=tf_err_fn, seed=seed, eta_divisor=eta_divisor) print("Starting to train model %s on data %s" % (model, dset), flush=True) t_s = time.time() model.fit(Xtr, Ytr, x_val=Xts, y_val=Yts, epochs=np.arange(num_iter - 1) + 1) print("Training of algorithm %s on %s done in %.2fs" % (algorithm, dset, time.time() - t_s), flush=True) test_model(model, f"{algorithm} on {dset}", Xts, Yts, Xtr, Ytr, err_fns) else: print("Will train EigenPro model on data %s with %d-fold CV" % (dset, kfold), flush=True) load_fn = get_cv_fn(dset) iteration = 0 test_errs, train_errs = [], [] for Xtr, Ytr, Xts, Yts, kwargs in load_fn(k=kfold, dtype=dtype.to_numpy_dtype(), as_torch=False): err_fns = [functools.partial(fn, **kwargs) for fn in err_fns] tf_err_fn = functools.partial(tf_err_fn, **kwargs) tf_err_fn.__name__ = "tf_error" model = EigenPro(kernel, Xtr, n_label=Ytr.shape[1], mem_gb=mem_gb, n_subsample=n_subsample, q=q, bs=None, metric=tf_err_fn, seed=seed) print("Starting EPRO fit (fold %d)" % (iteration)) model.fit(Xtr, Ytr, x_val=Xts, y_val=Yts, epochs=np.arange(num_iter - 1) + 1) iteration += 1 c_test_errs, c_train_errs = test_model(model, f"{algorithm} on {dset}", Xts, Yts, Xtr, Ytr, err_fns) train_errs.append(c_train_errs) test_errs.append(c_test_errs) print("Full errors: Test %s - Train %s" % (test_errs, train_errs)) print() print("%d-Fold Error Report" % (kfold)) for err_fn_i in range(len(err_fns)): print("Final test errors: %.4f +- %4f" % (np.mean( [e[err_fn_i] for e in test_errs]), np.std([e[err_fn_i] for e in test_errs]))) print("Final train errors: %.4f +- %4f" % (np.mean([e[err_fn_i] for e in train_errs ]), np.std([e[err_fn_i] for e in train_errs]))) print()
def run_gpytorch( dset: Dataset, algorithm: Algorithm, dtype: Optional[DataType], batch_size: int, lr: float, natgrad_lr: float, num_iter: int, num_centers: int, kernel_sigma: float, var_dist: str, learn_ind_pts: bool, kfold: int, seed: int, ind_pt_file: Optional[str] = None, ): import torch import gpytorch from gpytorch_variational_models import TwoClassVGP, RegressionVGP, MultiClassVGP torch.manual_seed(seed) np.random.seed(seed) # Data types if dtype is None: dtype = DataType.float32 if dtype.to_numpy_dtype() != np.float32: raise RuntimeError( f"{algorithm} can only run on single-precision floats.") # Error metrics err_fns = get_err_fns(dset) def get_model(Xtr, num_outputs, err_fn): num_samples = Xtr.shape[0] # Inducing points if ind_pt_file is None or not os.path.isfile(ind_pt_file): inducing_idx = np.random.choice(num_samples, num_centers, replace=False) inducing_points = Xtr[inducing_idx].reshape(num_centers, -1) print("Took %d random inducing points" % (inducing_points.shape[0])) else: inducing_points = torch.from_numpy( np.load(ind_pt_file).astype(dtype.to_numpy_dtype())) print("Loaded %d inducing points to %s" % (inducing_points.shape[0], ind_pt_file)) # Determine num devices n_devices = torch.cuda.device_count() output_device = torch.device('cuda:0') # Kernel if num_outputs == 1: # Kernel has 1 length-scale! kernel = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel(ard_num_dims=None)) kernel.base_kernel.lengthscale = kernel_sigma #kernel = gpytorch.kernels.keops.RBFKernel(ard_num_dims=None) #kernel.lengthscale = kernel_sigma else: kernel = gpytorch.kernels.ScaleKernel( gpytorch.kernels.RBFKernel(ard_num_dims=None, batch_shape=torch.Size( [num_outputs]))) #kernel = gpytorch.kernels.keops.RBFKernel(ard_num_dims=None, batch_shape=torch.Size([num_outputs])) if algorithm == Algorithm.GPYTORCH_CLS: if num_outputs == 1: # 2 classes model = TwoClassVGP( inducing_points, kernel, var_dist=var_dist, err_fn=err_fn, mb_size=batch_size, num_data=num_samples, num_epochs=num_iter, use_cuda=True, lr=lr, natgrad_lr=natgrad_lr, learn_ind_pts=learn_ind_pts, ) else: # multiclass model = MultiClassVGP( inducing_points, kernel, num_classes=num_outputs, var_dist=var_dist, err_fn=err_fn, mb_size=batch_size, num_data=num_samples, num_epochs=num_iter, use_cuda=True, natgrad_lr=natgrad_lr, lr=lr, learn_ind_pts=learn_ind_pts, ) else: if num_outputs != 1: raise NotImplementedError( "Multi-output regression not yet implemented.") model = RegressionVGP( inducing_points, kernel, var_dist=var_dist, err_fn=err_fn, mb_size=batch_size, num_data=num_samples, num_epochs=num_iter, use_cuda=True, natgrad_lr=natgrad_lr, lr=lr, learn_ind_pts=learn_ind_pts, ) return model if kfold == 1: # Load data load_fn = get_load_fn(dset) Xtr, Ytr, Xts, Yts, kwargs = load_fn(dtype=dtype.to_numpy_dtype(), as_torch=True) err_fns = [functools.partial(fn, **kwargs) for fn in err_fns] model = get_model(Xtr, Ytr.shape[1], err_fns[0]) print("Starting to train model %s on data %s" % (model, dset), flush=True) t_s = time.time() model.do_train(Xtr, Ytr, Xts, Yts) print("Training of %s on %s complete in %.2fs" % (algorithm, dset, time.time() - t_s), flush=True) #print("Learned model parameters:") #print(dict(model.model.named_parameters())) #print() if isinstance(model, TwoClassVGP): # Need Ys in range [0,1] for correct error calculation Yts = (Yts + 1) / 2 Ytr = (Ytr + 1) / 2 test_model(model, f"{algorithm} on {dset}", Xts, Yts, Xtr, Ytr, err_fns) #if ind_pt_file is not None: # np.save(ind_pt_file, model.model.inducing_points.cpu().detach().numpy()) # print("Saved inducing points to %s" % (ind_pt_file)) else: print("Will train GPytorch on data %s with %d-fold CV" % (dset, kfold), flush=True) load_fn = get_cv_fn(dset) iteration = 0 test_errs, train_errs = [], [] for Xtr, Ytr, Xts, Yts, kwargs in load_fn(k=kfold, dtype=dtype.to_numpy_dtype(), as_torch=True): err_fns = [functools.partial(fn, **kwargs) for fn in err_fns] model = get_model(Xtr, Ytr.shape[1], err_fns[0]) print("Starting GPytorch fit (fold %d)" % (iteration)) model.do_train(Xtr, Ytr, Xts, Yts) iteration += 1 c_test_errs, c_train_errs = test_model(model, f"{algorithm} on {dset}", Xts, Yts, Xtr, Ytr, err_fns) train_errs.append(c_train_errs) test_errs.append(c_test_errs) print("Full errors: Test %s - Train %s" % (test_errs, train_errs)) print() print("%d-Fold Error Report" % (kfold)) for err_fn_i in range(len(err_fns)): print("Final test errors: %.4f +- %4f" % (np.mean( [e[err_fn_i] for e in test_errs]), np.std([e[err_fn_i] for e in test_errs]))) print("Final train errors: %.4f +- %4f" % (np.mean([e[err_fn_i] for e in train_errs ]), np.std([e[err_fn_i] for e in train_errs]))) print()