def load_xydataset_from_file(path, dtype=torch.float32, batch_size=None, shuffle=False): dataset = XYDataset.from_file(path=path, dtype=dtype) dataloader = DataLoader(dataset, batch_size=batch_size or len(dataset), shuffle=shuffle) return dataset, dataloader
x = pd.read_csv(data_path.joinpath('x.csv')) y = pd.read_csv(data_path.joinpath('y.csv')) # %% Split data to training and test subsets x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=5000, stratify=y) # %% Create training dataset training_dataset = XYDataset( torch.from_numpy(x_train.to_numpy(dtype=torch_to_np_types[dtype])), torch.from_numpy(y_train.to_numpy(dtype=torch_to_np_types[dtype]))) training_dataset.x = \ (training_dataset.x - torch.mean(training_dataset.x, dim=0, keepdim=True))/ \ torch.std(training_dataset.x, dim=0, keepdim=True, unbiased=False) # %% Create test dataset test_dataset = XYDataset( torch.from_numpy(x_test.to_numpy(dtype=torch_to_np_types[dtype])), torch.from_numpy(y_test.to_numpy(dtype=torch_to_np_types[dtype]))) test_dataset.x = \ (test_dataset.x - torch.mean(test_dataset.x, dim=0, keepdim=True))/ \ torch.std(test_dataset.x, dim=0, keepdim=True, unbiased=False)
import unittest from torch.autograd import grad from torch.distributions import Normal from torch.utils.data import DataLoader from eeyore.constants import loss_functions from eeyore.datasets import XYDataset from eeyore.models.mlp import Hyperparameters, MLP from eeyore.stats import binary_cross_entropy # %% Compute MLP log-target using eeyore API version # Load XOR data xor = XYDataset.from_eeyore('xor', dtype=torch.float64) data = xor.x labels = xor.y dataloader = DataLoader(xor, batch_size=4, shuffle=False) # Setup MLP model hparams = Hyperparameters([2, 2, 1]) model = MLP(loss=loss_functions['binary_classification'], hparams=hparams, dtype=torch.float64) model.prior = Normal(torch.zeros(9, dtype=torch.float64), 100 * torch.ones(9, dtype=torch.float64))
from datetime import timedelta from timeit import default_timer as timer from torch.distributions import Normal from torch.utils.data import DataLoader import kanga.plots as ps from eeyore.datasets import XYDataset from eeyore.models import logistic_regression from eeyore.samplers import RAM from eeyore.stats import binary_cross_entropy # %% Load and standardize Swiwss banknote data banknotes = XYDataset.from_eeyore('banknotes', dtype=torch.float32) banknotes.x = banknotes.x[:, :4] banknotes.x = \ (banknotes.x - torch.mean(banknotes.x, dim=0, keepdim=True))/ \ torch.std(banknotes.x, dim=0, keepdim=True, unbiased=False) dataloader = DataLoader(banknotes, batch_size=len(banknotes)) # %% Setup logistic regression model hparams = logistic_regression.Hyperparameters(input_size=4, bias=False) model = logistic_regression.LogisticRegression( loss=lambda x, y: binary_cross_entropy(x, y, reduction='sum'), hparams=hparams, dtype=torch.float32) model.prior = Normal(torch.zeros(model.num_params(), dtype=model.dtype),
import kanga.plots as ps from eeyore.constants import loss_functions from eeyore.datasets import XYDataset from eeyore.models import mlp from eeyore.samplers import MALA # %% Avoid issuing memory warning due to number of plots plt.rcParams.update({'figure.max_open_warning': 0}) # %% Load Iris data iris = XYDataset.from_eeyore('iris', yndmin=1, dtype=torch.float32, yonehot=True) dataloader = DataLoader(iris, batch_size=len(iris), shuffle=True) # %% Setup MLP model hparams = mlp.Hyperparameters(dims=[4, 3, 3], activations=[torch.sigmoid, None]) model = mlp.MLP(loss=loss_functions['multiclass_classification'], hparams=hparams, dtype=torch.float32) model.prior = Normal( torch.zeros(model.num_params(), dtype=model.dtype), (3 * torch.ones(model.num_params(), dtype=model.dtype)).sqrt()) # %% Setup MALA sampler
# %% Import packages from torch.utils.data import DataLoader from eeyore.datasets import XYDataset from bnn_mcmc_examples.examples.mlp.exact_xor.constants import dtype # %% Load dataloader dataset = XYDataset.from_eeyore('xor', dtype=dtype) dataloader = DataLoader(dataset, batch_size=len(dataset))
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=7000, stratify=y) # %% Drop covariate 'year' x_train = x_train.drop(['year'], axis=1) x_test = x_test.drop(['year'], axis=1) # %% Create training dataset training_dataset = XYDataset( torch.from_numpy(x_train.to_numpy(dtype=torch_to_np_types[dtype])), torch.from_numpy(y_train.to_numpy(dtype=torch_to_np_types[dtype]))) training_dataset.x = \ (training_dataset.x - torch.mean(training_dataset.x, dim=0, keepdim=True))/ \ torch.std(training_dataset.x, dim=0, keepdim=True, unbiased=False) training_dataset.y = one_hot(training_dataset.y.squeeze(-1).long()).to( training_dataset.y.dtype) # %% Create test dataset test_dataset = XYDataset( torch.from_numpy(x_test.to_numpy(dtype=torch_to_np_types[dtype])), torch.from_numpy(y_test.to_numpy(dtype=torch_to_np_types[dtype])))
# https://discuss.pytorch.org/t/mnist-normalization/49080/2 # https://gist.github.com/kdubovikov/eb2a4c3ecadd5295f68c126542e59f0a training_dataset = datasets.MNIST(root=data_root, train=True, download=False, transform=None) # print("Mean = ", training_dataset.data.float().mean() / 255) # print("Std = ", training_dataset.data.float().std() / 255) # Mean = tensor(0.1307) # Std = tensor(0.3081) training_dataset = XYDataset( training_dataset.data.to(dtype).reshape( training_dataset.data.shape[0], training_dataset.data.shape[1] * training_dataset.data.shape[2]), training_dataset.targets.to(dtype)[:, None]) training_dataset.x = (training_dataset.x - training_dataset.x.mean()) / training_dataset.x.std() training_dataset.y = one_hot(training_dataset.y.squeeze(-1).long()).to( training_dataset.y.dtype) # %% Create test dataset test_dataset = datasets.MNIST(root=data_root, train=False, download=False) test_dataset = XYDataset( test_dataset.data.to(dtype).reshape(