def main(args): # load data print('loading training data...') dataset_directory = get_data_directory(__file__) dataset_path = os.path.join(dataset_directory, 'faces_training.csv') if not os.path.exists(dataset_path): try: os.makedirs(dataset_directory) except OSError as e: if e.errno != errno.EEXIST: raise pass wget.download( 'https://d2hg8soec8ck9v.cloudfront.net/datasets/faces_training.csv', dataset_path) data = torch.tensor(np.loadtxt(dataset_path, delimiter=',')).float() sparse_gamma_def = SparseGammaDEF() # Due to the special logic in the custom guide (e.g. parameter clipping), the custom guide # seems to be more amenable to higher learning rates. # Nevertheless, the easy guide performs the best (presumably because of numerical instabilities # related to the gamma distribution in the custom guide). learning_rate = 0.2 if args.guide in ['auto', 'easy'] else 4.5 momentum = 0.05 if args.guide in ['auto', 'easy'] else 0.1 opt = optim.AdagradRMSProp({"eta": learning_rate, "t": momentum}) # use one of our three different guide types if args.guide == 'auto': guide = AutoDiagonalNormal(sparse_gamma_def.model, init_loc_fn=init_to_feasible) elif args.guide == 'easy': guide = MyEasyGuide(sparse_gamma_def.model) else: guide = sparse_gamma_def.guide # this is the svi object we use during training; we use TraceMeanField_ELBO to # get analytic KL divergences svi = SVI(sparse_gamma_def.model, guide, opt, loss=TraceMeanField_ELBO()) # we use svi_eval during evaluation; since we took care to write down our model in # a fully vectorized way, this computation can be done efficiently with large tensor ops svi_eval = SVI(sparse_gamma_def.model, guide, opt, loss=TraceMeanField_ELBO(num_particles=args.eval_particles, vectorize_particles=True)) print('\nbeginning training with %s guide...' % args.guide) # the training loop for k in range(args.num_epochs): loss = svi.step(data) # for the custom guide we clip parameters after each gradient step if args.guide == 'custom': clip_params() if k % args.eval_frequency == 0 and k > 0 or k == args.num_epochs - 1: loss = svi_eval.evaluate_loss(data) print("[epoch %04d] training elbo: %.4g" % (k, -loss))
def setup_data_loaders(dataset, use_cuda, batch_size, sup_num=None, root=None, download=True, **kwargs): """ helper function for setting up pytorch data loaders for a semi-supervised dataset :param dataset: the data to use :param use_cuda: use GPU(s) for training :param batch_size: size of a batch of data to output when iterating over the data loaders :param sup_num: number of supervised data examples :param download: download the dataset (if it doesn't exist already) :param kwargs: other params for the pytorch data loader :return: three data loaders: (supervised data for training, un-supervised data for training, supervised data for testing) """ # instantiate the dataset as training/testing sets if root is None: root = get_data_directory(__file__) if 'num_workers' not in kwargs: kwargs = {'num_workers': 0, 'pin_memory': False} cached_data = {} loaders = {} for mode in ["unsup", "test", "sup", "valid"]: if sup_num is None and mode == "sup": # in this special case, we do not want "sup" and "valid" data loaders return loaders["unsup"], loaders["test"] cached_data[mode] = dataset(root=root, mode=mode, download=download, sup_num=sup_num, use_cuda=use_cuda) loaders[mode] = DataLoader(cached_data[mode], batch_size=batch_size, shuffle=True, **kwargs) return loaders
def load_data(): inpath = get_data_directory(__file__) (X_np, Y), _ = multi_mnist(inpath, max_digits=2, canvas_size=50, seed=42) X_np = X_np.astype(np.float32) X_np /= 255.0 X = torch.from_numpy(X_np) counts = torch.FloatTensor([len(objs) for objs in Y]) return X, counts
def load_data(): inpath = get_data_directory(__file__) X_np, Y = multi_mnist.load(inpath) X_np = X_np.astype(np.float32) X_np /= 255.0 X = torch.from_numpy(X_np) counts = torch.FloatTensor([len(objs) for objs in Y]) return X, counts
def load_data(): inpath = get_data_directory(__file__) X_np, Y = multi_mnist.load(inpath) X_np = X_np.astype(np.float32) X_np /= 255.0 X = torch.from_numpy(X_np) # Using FloatTensor to allow comparison with values sampled from # Bernoulli. counts = torch.FloatTensor([len(objs) for objs in Y]) return X, counts
def load_data(): inpath = get_data_directory(__file__) (X_np, Y), _ = multi_mnist(inpath, max_digits=2, canvas_size=50, seed=42) X_np = X_np.astype(np.float32) X_np /= 255.0 X = torch.from_numpy(X_np) # Using FloatTensor to allow comparison with values sampled from # Bernoulli. counts = torch.FloatTensor([len(objs) for objs in Y]) return X, counts
def main(args): data_dir = args.data_dir if args.data_dir is not None else get_data_directory(__file__) train_loader = get_data_loader(dataset_name='MNIST', data_dir=data_dir, batch_size=args.batch_size, dataset_transforms=[transforms.Normalize((0.1307,), (0.3081,))], is_training_set=True, shuffle=True) test_loader = get_data_loader(dataset_name='MNIST', data_dir=data_dir, batch_size=args.batch_size, dataset_transforms=[transforms.Normalize((0.1307,), (0.3081,))], is_training_set=False, shuffle=True) cnn = CNN() # Create deep kernel by warping RBF with CNN. # CNN will transform a high dimension image into a low dimension 2D tensors for RBF kernel. # This kernel accepts inputs are inputs of CNN and gives outputs are covariance matrix of RBF # on outputs of CNN. rbf = gp.kernels.RBF(input_dim=10, lengthscale=torch.ones(10)) deep_kernel = gp.kernels.Warping(rbf, iwarping_fn=cnn) # init inducing points (taken randomly from dataset) Xu = next(iter(train_loader))[0][:args.num_inducing] # use MultiClass likelihood for 10-class classification problem likelihood = gp.likelihoods.MultiClass(num_classes=10) # Because we use Categorical distribution in MultiClass likelihood, we need GP model returns # a list of probabilities of each class. Hence it is required to use latent_shape = 10. # Turns on "whiten" flag will help optimization for variational models. gpmodule = gp.models.VariationalSparseGP(X=Xu, y=None, kernel=deep_kernel, Xu=Xu, likelihood=likelihood, latent_shape=torch.Size([10]), num_data=60000, whiten=True) if args.cuda: gpmodule.cuda() optimizer = torch.optim.Adam(gpmodule.parameters(), lr=args.lr) elbo = infer.JitTraceMeanField_ELBO() if args.jit else infer.TraceMeanField_ELBO() loss_fn = elbo.differentiable_loss for epoch in range(1, args.epochs + 1): start_time = time.time() train(args, train_loader, gpmodule, optimizer, loss_fn, epoch) with torch.no_grad(): test(args, test_loader, gpmodule) print("Amount of time spent for epoch {}: {}s\n" .format(epoch, int(time.time() - start_time)))
def main(args): # load data print('loading training data...') dataset_directory = get_data_directory(__file__) dataset_path = os.path.join(dataset_directory, 'faces_training.csv') if not os.path.exists(dataset_path): try: os.makedirs(dataset_directory) except OSError as e: if e.errno != errno.EEXIST: raise pass wget.download('https://d2fefpcigoriu7.cloudfront.net/datasets/faces_training.csv', dataset_path) data = torch.tensor(np.loadtxt(dataset_path, delimiter=',')).float() sparse_gamma_def = SparseGammaDEF() # due to the special logic in the custom guide (e.g. parameter clipping), the custom guide # is more numerically stable and enables us to use a larger learning rate (and consequently # achieves better results) learning_rate = 0.2 if args.auto_guide else 4.5 momentum = 0.05 if args.auto_guide else 0.1 opt = optim.AdagradRMSProp({"eta": learning_rate, "t": momentum}) # either use an automatically constructed guide (see pyro.contrib.autoguide for details) or our custom guide guide = AutoDiagonalNormal(sparse_gamma_def.model) if args.auto_guide else sparse_gamma_def.guide # this is the svi object we use during training; we use TraceMeanField_ELBO to # get analytic KL divergences svi = SVI(sparse_gamma_def.model, guide, opt, loss=TraceMeanField_ELBO()) # we use svi_eval during evaluation; since we took care to write down our model in # a fully vectorized way, this computation can be done efficiently with large tensor ops svi_eval = SVI(sparse_gamma_def.model, guide, opt, loss=TraceMeanField_ELBO(num_particles=args.eval_particles, vectorize_particles=True)) guide_description = 'automatically constructed' if args.auto_guide else 'custom' print('\nbeginning training with %s guide...' % guide_description) # the training loop for k in range(args.num_epochs): loss = svi.step(data) if not args.auto_guide: # for the custom guide we clip parameters after each gradient step sparse_gamma_def.clip_params() if k % args.eval_frequency == 0 and k > 0 or k == args.num_epochs - 1: loss = svi_eval.evaluate_loss(data) print("[epoch %04d] training elbo: %.4g" % (k, -loss))
def main(args): # load data print('loading training data...') dataset_directory = get_data_directory(__file__) dataset_path = os.path.join(dataset_directory, 'faces_training.csv') if not os.path.exists(dataset_path): try: os.makedirs(dataset_directory) except OSError as e: if e.errno != errno.EEXIST: raise pass wget.download( 'https://d2fefpcigoriu7.cloudfront.net/datasets/faces_training.csv', dataset_path) data = torch.tensor(np.loadtxt(dataset_path, delimiter=',')).float() learning_rate = 4.5 momentum = 0.1 opt = optim.AdagradRMSProp({"eta": learning_rate, "t": momentum}) # this is the svi object we use during training; we use TraceMeanField_ELBO to # get analytic KL divergences svi = SVI(model, guide, opt, loss=TraceMeanField_ELBO()) # we use svi_eval during evaluation; since we took care to write down our model in # a fully vectorized way, this computation can be done efficiently with large tensor ops svi_eval = SVI(model_original, guide, opt, loss=TraceMeanField_ELBO(num_particles=args.eval_particles, vectorize_particles=True)) guide_description = 'custom' print('\nbeginning training with %s guide...' % guide_description) # the training loop for k in range(args.num_epochs): loss = svi.step(data) clip_params() if k % args.eval_frequency == 0 and k > 0 or k == args.num_epochs - 1: loss = svi_eval.evaluate_loss(data) print("[epoch %04d] training elbo: %.4g" % (k, -loss))
seq_length = len(data_split[seq]) processed_dataset[split]['sequence_lengths'][seq] = seq_length processed_sequence = torch.zeros((seq_length, note_range)) for t in range(seq_length): note_slice = torch.tensor(list(data_split[seq][t])) - min_note slice_length = len(note_slice) if slice_length > 0: processed_sequence[t, note_slice] = torch.ones(slice_length) processed_dataset[split]['sequences'].append(processed_sequence) pickle.dump(processed_dataset, open(output, "wb"), pickle.HIGHEST_PROTOCOL) print("dumped processed data to %s" % output) # this logic will be initiated upon import base_path = get_data_directory(__file__) if not os.path.exists(base_path): os.mkdir(base_path) # ingest training/validation/test data from disk def load_data(dataset): # download and process dataset if it does not exist process_data(base_path, dataset) file_loc = os.path.join(base_path, dataset.filename) with open(file_loc, "rb") as f: dset = pickle.load(f) for k, v in dset.items(): sequences = v["sequences"] #=== wy: force seq_len be MAX_T seq_new = []
import argparse import csv import datetime import logging import multiprocessing import os import subprocess import sys import urllib import torch from pyro.contrib.examples.util import get_data_directory DATA = get_data_directory(__file__) # https://www.bart.gov/about/reports/ridership SOURCE_DIR = "http://64.111.127.166/origin-destination/" SOURCE_FILES = [ "date-hour-soo-dest-2011.csv.gz", "date-hour-soo-dest-2012.csv.gz", "date-hour-soo-dest-2013.csv.gz", "date-hour-soo-dest-2014.csv.gz", "date-hour-soo-dest-2015.csv.gz", "date-hour-soo-dest-2016.csv.gz", "date-hour-soo-dest-2017.csv.gz", "date-hour-soo-dest-2018.csv.gz", ] CACHE_URL = "https://d2hg8soec8ck9v.cloudfront.net/datasets/bart_full.pkl.bz2"
def main(model, guide, args): # init if args.seed is not None: pyro.set_rng_seed(args.seed) logger = get_logger(args.log, __name__) logger.info(args) torch.set_default_tensor_type('torch.FloatTensor') #torch.set_default_tensor_type('torch.DoubleTensor') # load data dataset_directory = get_data_directory(__file__) dataset_path = os.path.join(dataset_directory, 'faces_training.csv') if not os.path.exists(dataset_path): try: os.makedirs(dataset_directory) except OSError as e: if e.errno != errno.EEXIST: raise wget.download('https://d2hg8soec8ck9v.cloudfront.net/datasets/faces_training.csv', dataset_path) data = torch.tensor(np.loadtxt(dataset_path, delimiter=',')).float() # setup svi pyro.clear_param_store() # # WL: edited to make SCORE behave well. ===== # # (lr,mmt) values: # # - original values in sgdef : (4.5, 0.1) --- SCORE decreases ELBO. # # - default of AdaradRMSProp(..): (1.0, 0.1) --- SCORE decreases ELBO (from iter 200). # # - current values : (0.1, 0.1) --- SCORE behaves well. # learning_rate = 0.1 # momentum = 0.1 # # =========================================== opt = optim.AdagradRMSProp({"eta": args.learning_rate, "t": args.momentum}) # elbo = TraceMeanField_ELBO() elbo = Trace_ELBO() # this is the svi object we use during training; we use TraceMeanField_ELBO to # get analytic KL divergences svi = SVI(model.main, guide.main, opt, loss=elbo) svi_arg_l = [data] # # train (init) # loss = svi.evaluate_loss(*svi_arg_l) # param_state = copy.deepcopy(pyro.get_param_store().get_state()) # elbo_l = [-loss] # param_state_l = [param_state] # train times = [time.time()] logger.info(f"\nepoch\t"+"elbo\t"+"time(sec)") for i in range(1, args.num_epochs+1): loss = svi.step(*svi_arg_l) # elbo_l.append(-loss) clip_params() # if (i+1) % param_freq == 0: # param_state = copy.deepcopy(pyro.get_param_store().get_state()) # param_state_l.append(param_state) if (args.eval_frequency > 0 and i % args.eval_frequency == 0) or (i == 1): times.append(time.time()) logger.info(f"{i:06d}\t" f"{-loss:.4f}\t" f"{times[-1]-times[-2]:.3f}")