def main(args): # load data print('loading training data...') dataset_directory = get_data_directory(__file__) dataset_path = os.path.join(dataset_directory, 'faces_training.csv') if not os.path.exists(dataset_path): try: os.makedirs(dataset_directory) except OSError as e: if e.errno != errno.EEXIST: raise pass wget.download( 'https://d2hg8soec8ck9v.cloudfront.net/datasets/faces_training.csv', dataset_path) data = torch.tensor(np.loadtxt(dataset_path, delimiter=',')).float() sparse_gamma_def = SparseGammaDEF() # Due to the special logic in the custom guide (e.g. parameter clipping), the custom guide # seems to be more amenable to higher learning rates. # Nevertheless, the easy guide performs the best (presumably because of numerical instabilities # related to the gamma distribution in the custom guide). learning_rate = 0.2 if args.guide in ['auto', 'easy'] else 4.5 momentum = 0.05 if args.guide in ['auto', 'easy'] else 0.1 opt = optim.AdagradRMSProp({"eta": learning_rate, "t": momentum}) # use one of our three different guide types if args.guide == 'auto': guide = AutoDiagonalNormal(sparse_gamma_def.model, init_loc_fn=init_to_feasible) elif args.guide == 'easy': guide = MyEasyGuide(sparse_gamma_def.model) else: guide = sparse_gamma_def.guide # this is the svi object we use during training; we use TraceMeanField_ELBO to # get analytic KL divergences svi = SVI(sparse_gamma_def.model, guide, opt, loss=TraceMeanField_ELBO()) # we use svi_eval during evaluation; since we took care to write down our model in # a fully vectorized way, this computation can be done efficiently with large tensor ops svi_eval = SVI(sparse_gamma_def.model, guide, opt, loss=TraceMeanField_ELBO(num_particles=args.eval_particles, vectorize_particles=True)) print('\nbeginning training with %s guide...' % args.guide) # the training loop for k in range(args.num_epochs): loss = svi.step(data) # for the custom guide we clip parameters after each gradient step if args.guide == 'custom': clip_params() if k % args.eval_frequency == 0 and k > 0 or k == args.num_epochs - 1: loss = svi_eval.evaluate_loss(data) print("[epoch %04d] training elbo: %.4g" % (k, -loss))
def main(args): # load data print('loading training data...') if not os.path.exists('faces_training.csv'): wget.download( 'https://d2fefpcigoriu7.cloudfront.net/datasets/faces_training.csv', 'faces_training.csv') data = torch.tensor(np.loadtxt('faces_training.csv', delimiter=',')).float() sparse_gamma_def = SparseGammaDEF() opt = optim.AdagradRMSProp({"eta": 4.5, "t": 0.1}) svi = SVI(sparse_gamma_def.model, sparse_gamma_def.guide, opt, loss=Trace_ELBO()) print('\nbeginning training...') # the training loop for k in range(args.num_epochs): loss = svi.step(data) sparse_gamma_def.clip_params( ) # we clip params after each gradient step if k % 20 == 0 and k > 0: print("[epoch %04d] training elbo: %.4g" % (k, -loss))
def main(args): # load data print('loading training data...') dataset_directory = get_data_directory(__file__) dataset_path = os.path.join(dataset_directory, 'faces_training.csv') if not os.path.exists(dataset_path): try: os.makedirs(dataset_directory) except OSError as e: if e.errno != errno.EEXIST: raise pass wget.download('https://d2fefpcigoriu7.cloudfront.net/datasets/faces_training.csv', dataset_path) data = torch.tensor(np.loadtxt(dataset_path, delimiter=',')).float() sparse_gamma_def = SparseGammaDEF() # due to the special logic in the custom guide (e.g. parameter clipping), the custom guide # is more numerically stable and enables us to use a larger learning rate (and consequently # achieves better results) learning_rate = 0.2 if args.auto_guide else 4.5 momentum = 0.05 if args.auto_guide else 0.1 opt = optim.AdagradRMSProp({"eta": learning_rate, "t": momentum}) # either use an automatically constructed guide (see pyro.contrib.autoguide for details) or our custom guide guide = AutoDiagonalNormal(sparse_gamma_def.model) if args.auto_guide else sparse_gamma_def.guide # this is the svi object we use during training; we use TraceMeanField_ELBO to # get analytic KL divergences svi = SVI(sparse_gamma_def.model, guide, opt, loss=TraceMeanField_ELBO()) # we use svi_eval during evaluation; since we took care to write down our model in # a fully vectorized way, this computation can be done efficiently with large tensor ops svi_eval = SVI(sparse_gamma_def.model, guide, opt, loss=TraceMeanField_ELBO(num_particles=args.eval_particles, vectorize_particles=True)) guide_description = 'automatically constructed' if args.auto_guide else 'custom' print('\nbeginning training with %s guide...' % guide_description) # the training loop for k in range(args.num_epochs): loss = svi.step(data) if not args.auto_guide: # for the custom guide we clip parameters after each gradient step sparse_gamma_def.clip_params() if k % args.eval_frequency == 0 and k > 0 or k == args.num_epochs - 1: loss = svi_eval.evaluate_loss(data) print("[epoch %04d] training elbo: %.4g" % (k, -loss))
def main(args): # load data print('loading training data...') dataset_directory = get_data_directory(__file__) dataset_path = os.path.join(dataset_directory, 'faces_training.csv') if not os.path.exists(dataset_path): try: os.makedirs(dataset_directory) except OSError as e: if e.errno != errno.EEXIST: raise pass wget.download( 'https://d2fefpcigoriu7.cloudfront.net/datasets/faces_training.csv', dataset_path) data = torch.tensor(np.loadtxt(dataset_path, delimiter=',')).float() learning_rate = 4.5 momentum = 0.1 opt = optim.AdagradRMSProp({"eta": learning_rate, "t": momentum}) # this is the svi object we use during training; we use TraceMeanField_ELBO to # get analytic KL divergences svi = SVI(model, guide, opt, loss=TraceMeanField_ELBO()) # we use svi_eval during evaluation; since we took care to write down our model in # a fully vectorized way, this computation can be done efficiently with large tensor ops svi_eval = SVI(model_original, guide, opt, loss=TraceMeanField_ELBO(num_particles=args.eval_particles, vectorize_particles=True)) guide_description = 'custom' print('\nbeginning training with %s guide...' % guide_description) # the training loop for k in range(args.num_epochs): loss = svi.step(data) clip_params() if k % args.eval_frequency == 0 and k > 0 or k == args.num_epochs - 1: loss = svi_eval.evaluate_loss(data) print("[epoch %04d] training elbo: %.4g" % (k, -loss))
def main(model, guide, args): # init if args.seed is not None: pyro.set_rng_seed(args.seed) logger = get_logger(args.log, __name__) logger.info(args) torch.set_default_tensor_type('torch.FloatTensor') #torch.set_default_tensor_type('torch.DoubleTensor') # load data dataset_directory = get_data_directory(__file__) dataset_path = os.path.join(dataset_directory, 'faces_training.csv') if not os.path.exists(dataset_path): try: os.makedirs(dataset_directory) except OSError as e: if e.errno != errno.EEXIST: raise wget.download('https://d2hg8soec8ck9v.cloudfront.net/datasets/faces_training.csv', dataset_path) data = torch.tensor(np.loadtxt(dataset_path, delimiter=',')).float() # setup svi pyro.clear_param_store() # # WL: edited to make SCORE behave well. ===== # # (lr,mmt) values: # # - original values in sgdef : (4.5, 0.1) --- SCORE decreases ELBO. # # - default of AdaradRMSProp(..): (1.0, 0.1) --- SCORE decreases ELBO (from iter 200). # # - current values : (0.1, 0.1) --- SCORE behaves well. # learning_rate = 0.1 # momentum = 0.1 # # =========================================== opt = optim.AdagradRMSProp({"eta": args.learning_rate, "t": args.momentum}) # elbo = TraceMeanField_ELBO() elbo = Trace_ELBO() # this is the svi object we use during training; we use TraceMeanField_ELBO to # get analytic KL divergences svi = SVI(model.main, guide.main, opt, loss=elbo) svi_arg_l = [data] # # train (init) # loss = svi.evaluate_loss(*svi_arg_l) # param_state = copy.deepcopy(pyro.get_param_store().get_state()) # elbo_l = [-loss] # param_state_l = [param_state] # train times = [time.time()] logger.info(f"\nepoch\t"+"elbo\t"+"time(sec)") for i in range(1, args.num_epochs+1): loss = svi.step(*svi_arg_l) # elbo_l.append(-loss) clip_params() # if (i+1) % param_freq == 0: # param_state = copy.deepcopy(pyro.get_param_store().get_state()) # param_state_l.append(param_state) if (args.eval_frequency > 0 and i % args.eval_frequency == 0) or (i == 1): times.append(time.time()) logger.info(f"{i:06d}\t" f"{-loss:.4f}\t" f"{times[-1]-times[-2]:.3f}")