def run_experiment(args): # set environment variable for theano os.environ['THEANO_FLAGS'] = "device=gpu" + str(args.gpu) from learning.experiment import Experiment from learning.training import DSGNHTTrainer from learning.termination import EarlyStopping from learning.monitor import MonitorLL, DLogModelParams, SampleFromP, MonitorPosteriorMean from learning.dataset import Omniglot from learning.preproc import PermuteColumns, Binarize from learning.models.dgm_vae import VAEDGMLayerStack from learning.models.vae import VAE, StandardVAETop np.set_printoptions(precision=2) logger.debug("Arguments %s" % args) tags = [] np.random.seed(23) # n_samples to evaluate model n_samples_epoch = [1, 5, 25, 100] n_samples_final = [1, 5, 10, 25, 100, 500, 1000, 5000] # no. posterior samples for posterior mean postsamples = [int(s) for s in args.postsamples.split(",")] n_X = 28 * 28 p_layers = [] q_layers = [] model = None # build the variational auto-encoder layer stack if args.layers == 1: latent_units = [50] hidden_units_q = [[200, 200]] hidden_units_p = [[200, 200]] n_Y = latent_units[0] p_layers.append( VAE(n_X=n_X, n_Y=n_Y, det_units=hidden_units_p[0], data_type="binary", bias=None)) p_layers.append(StandardVAETop(n_X=n_Y)) q_layers.append( VAE(n_X=n_Y, n_Y=n_X, det_units=hidden_units_q[0], data_type="continuous", bias=None)) model = VAEDGMLayerStack( p_layers=p_layers, q_layers=q_layers, ) model.setup() elif args.layers == 2: latent_units = [100, 50] hidden_units_p = [[200, 200], [100, 100]] hidden_units_q = [[100, 100], [200, 200]] n_Y = latent_units[0] p_layers.append( VAE(n_X=n_X, n_Y=latent_units[0], det_units=hidden_units_p[0], data_type="binary", bias=None)) q_layers.append( VAE(n_X=latent_units[0], n_Y=n_X, det_units=hidden_units_q[1], data_type="continuous", bias=None)) p_layers.append( VAE(n_X=latent_units[0], n_Y=latent_units[1], det_units=hidden_units_p[1], data_type="continuous", bias=None)) q_layers.append( VAE(n_X=latent_units[1], n_Y=latent_units[0], det_units=hidden_units_q[0], data_type="continuous", bias=None)) p_layers.append(StandardVAETop(n_X=latent_units[1])) model = VAEDGMLayerStack( p_layers=p_layers, q_layers=q_layers, ) model.setup() assert model is not None # parameters def param_tag(value): """ Convert a float into a short tag-usable string representation. E.g.: 0.1 -> 11 0.01 -> 12 0.001 -> 13 0.005 -> 53 """ if value == 0.0: return "00" exp = np.floor(np.log10(value)) leading = ("%e" % value)[0] return "%s%d" % (leading, -exp) # Learning rate lr_p = args.lr_p tags += ["lp" + param_tag(lr_p)] lr_q = args.lr_q tags += ["lq" + param_tag(lr_q)] # LR decay if args.lrdecay != 1.0: tags += ["lrdecay" + param_tag(args.lrdecay - 1.)] # Samples n_samples = args.samples tags += ["spl%d" % n_samples] # Batch size batch_size = args.batchsize tags += ["bs%d" % batch_size] # n_steps_simu n_steps_simu = args.n_simu tags += ["ns%d" % n_steps_simu] # n_steps_optm n_steps_optm = args.n_optm tags += ["no%d" % n_steps_optm] # momentum_decay momentum_decay = args.momentum_decay tags += ["md" + param_tag(momentum_decay)] # Dataset if args.shuffle: np.random.seed(23) preproc = [PermuteColumns()] tags += ["shuffle"] else: preproc = [] binarize_preproc = preproc + [Binarize(late=True)] dataset = Omniglot(which_set='train', preproc=binarize_preproc) valiset = Omniglot(which_set='valid', preproc=binarize_preproc) testset = Omniglot(which_set='test', preproc=binarize_preproc) # lookahead lookahead = args.lookahead tags += ["lah%d" % lookahead] tags.sort() expname = "dsgnht-%s-%slayer" % ("-".join(tags), str(args.layers)) if args.report: expname = "report/" + expname logger.info("Running %s" % expname) #----------------------------------------------------------------------------- dlog_model_params_monitor = DLogModelParams() generate_data_monitor = SampleFromP(n_samples=100) trainer = DSGNHTTrainer( batch_size=batch_size, n_samples=n_samples, n_steps_simu=n_steps_simu, n_steps_optm=n_steps_optm, learning_rate_p=lr_p, learning_rate_q=lr_q, lr_decay=args.lrdecay, momentum_decay=momentum_decay, dataset=dataset, model=model, termination=EarlyStopping(lookahead=lookahead, min_epochs=10, max_epochs=999999), epoch_monitors=[ dlog_model_params_monitor, generate_data_monitor, MonitorLL(name="valiset", data=valiset, n_samples=n_samples_epoch), ], final_monitors=[ dlog_model_params_monitor, generate_data_monitor, MonitorLL(name="final-testset", data=testset, n_samples=n_samples_final, level=logging.CRITICAL), ], posterior_mean_samples=postsamples, posterior_mean_monitor=MonitorPosteriorMean(), ) experiment = Experiment() experiment.set_trainer(trainer) experiment.setup_output_dir(expname) experiment.setup_logging() experiment.print_summary() if args.cont is None: logger.info("Starting experiment ...") experiment.run_experiment() else: logger.info("Continuing experiment %s ...." % args.cont) experiment.continue_experiment(args.cont + "/results.h5", row=-1) logger.info("Finished. Wrinting metadata") experiment.print_summary()
def run_experiment(args): # set environment variable for theano os.environ['THEANO_FLAGS'] = "device=gpu" + str(args.gpu) from learning.experiment import Experiment from learning.training import DSGNHTTrainer from learning.termination import EarlyStopping from learning.monitor import MonitorLL, DLogModelParams, SampleFromP, MonitorPosteriorMean from learning.dataset import MNIST from learning.preproc import PermuteColumns, Binarize from learning.models.dgm import DGMLayerStack from learning.models.sbn import SBN, SBNTop from learning.models.dsbn import DSBN from learning.models.darn import DARN, DARNTop from learning.models.nade import NADE, NADETop np.set_printoptions(precision=2) logger.debug("Arguments %s" % args) tags = [] np.random.seed(23) # Layer models layer_models = { "sbn": (SBN, SBNTop), "dsbn": (DSBN, SBNTop), "darn": (DARN, DARNTop), "nade": (NADE, NADETop), } assert args.p_model in layer_models assert args.q_model in layer_models p_layer, p_top = layer_models[args.p_model] q_layer, q_top = layer_models[args.q_model] # n_samples to evaluate model n_samples_epoch = [1, 5, 25, 100] n_samples_final = [1, 5, 10, 25, 100, 500, 1000, 10000, 100000] if (args.p_model in ['darn', 'nade']) or (args.q_model in ['darn', 'nade' ]): n_samples_epoch = [1, 5, 25] n_samples_final = [1, 5, 10, 25, 100, 500] # no. posterior samples for posterior mean postsamples = [int(s) for s in args.postsamples.split(",")] # Layer sizes layer_sizes = [int(s) for s in args.layer_sizes.split(",")] n_X = 28 * 28 p_layers = [] q_layers = [] for ls in layer_sizes: n_Y = ls p_layers.append(p_layer(n_X=n_X, n_Y=n_Y)) q_layers.append(q_layer(n_X=n_Y, n_Y=n_X)) n_X = n_Y p_layers.append(p_top(n_X=n_X)) model = DGMLayerStack(p_layers=p_layers, q_layers=q_layers) model.setup() # parameters def param_tag(value): """ Convert a float into a short tag-usable string representation. E.g.: 0.1 -> 11 0.01 -> 12 0.001 -> 13 0.005 -> 53 """ if value == 0.0: return "00" exp = np.floor(np.log10(value)) leading = ("%e" % value)[0] return "%s%d" % (leading, -exp) # Learning rate lr_p = args.lr_p tags += ["lp" + param_tag(lr_p)] lr_q = args.lr_q tags += ["lq" + param_tag(lr_q)] # LR decay if args.lrdecay != 1.0: tags += ["lrdecay" + param_tag(args.lrdecay - 1.)] # Samples n_samples = args.samples tags += ["spl%d" % n_samples] # Batch size batch_size = args.batchsize tags += ["bs%d" % batch_size] # n_steps_simu n_steps_simu = args.n_simu tags += ["ns%d" % n_steps_simu] # n_steps_optm n_steps_optm = args.n_optm tags += ["no%d" % n_steps_optm] # momentum_decay momentum_decay = args.momentum_decay tags += ["md" + param_tag(momentum_decay)] # Dataset if args.shuffle: np.random.seed(23) preproc = [PermuteColumns()] tags += ["shuffle"] else: preproc = [] if args.rebinarize: binarize_preproc = preproc + [Binarize(late=True)] dataset = MNIST(which_set='train', preproc=binarize_preproc, n_datapoints=50000) valiset = MNIST(which_set='valid', preproc=binarize_preproc, n_datapoints=10000) testset = MNIST(which_set='test', preproc=binarize_preproc, n_datapoints=10000) # testset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='test', preproc=preproc, n_datapoints=10000) tags += ["rb"] else: dataset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='train', preproc=preproc, n_datapoints=50000) valiset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='valid', preproc=preproc, n_datapoints=10000) testset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='test', preproc=preproc, n_datapoints=10000) # lookahead lookahead = args.lookahead tags += ["lah%d" % lookahead] tags.sort() expname = "dsgnht-%s-%s-%s-%s" % ("-".join(tags), args.p_model, args.q_model, "-".join( [str(s) for s in layer_sizes])) if args.report: expname = "report/" + expname logger.info("Running %s" % expname) #----------------------------------------------------------------------------- dlog_model_params_monitor = DLogModelParams() generate_data_monitor = SampleFromP(n_samples=100) trainer = DSGNHTTrainer( batch_size=batch_size, n_samples=n_samples, n_steps_simu=n_steps_simu, n_steps_optm=n_steps_optm, learning_rate_p=lr_p, learning_rate_q=lr_q, lr_decay=args.lrdecay, momentum_decay=momentum_decay, dataset=dataset, model=model, termination=EarlyStopping(lookahead=lookahead, min_epochs=10, max_epochs=999999), epoch_monitors=[ dlog_model_params_monitor, generate_data_monitor, MonitorLL(name="valiset", data=valiset, n_samples=n_samples_epoch), ], final_monitors=[ dlog_model_params_monitor, generate_data_monitor, MonitorLL(name="final-testset", data=testset, n_samples=n_samples_final, level=logging.CRITICAL), ], posterior_mean_samples=postsamples, posterior_mean_monitor=MonitorPosteriorMean(), ) experiment = Experiment() experiment.set_trainer(trainer) experiment.setup_output_dir(expname) experiment.setup_logging() experiment.print_summary() if args.cont is None: logger.info("Starting experiment ...") experiment.run_experiment() else: logger.info("Continuing experiment %s ...." % args.cont) experiment.continue_experiment(args.cont + "/results.h5", row=-1) logger.info("Finished. Wrinting metadata") experiment.print_summary()
n_hid=200, ), ] model = LayerStack( p_layers=p_layers, q_layers=q_layers, ) trainer = Trainer( n_samples=5, learning_rate_p=1e-3, learning_rate_q=1e-3, learning_rate_s=1e-3, layer_discount=1.0, batch_size=25, dataset=dataset, model=model, termination=EarlyStopping(), #step_monitors=[MonitorLL(data=smallset, n_samples=[1, 5, 25, 100])], epoch_monitors=[ MonitorLL(data=valiset, n_samples=[100]), DLogModelParams(), SampleFromP(n_samples=100) ], final_monitors=[ MonitorLL(data=testset, n_samples=[1, 5, 10, 25, 100, 500]) ], monitor_nth_step=100, )
trainer = Trainer( n_samples=5, learning_rate_p=3e-2, learning_rate_q=3e-2, learning_rate_s=3e-2, layer_discount=1.00, batch_size=25, dataset=dataset, model=model, termination=EarlyStopping(), #monitor_nth_step=100, #step_monitors=[ # MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]) #], epoch_monitors=[ DLogModelParams(), MonitorLL(name="valiset", data=valiset, n_samples=[1, 5, 25, 100]), MonitorLL(name="testset", data=testset, n_samples=[1, 5, 25, 100]), ], final_monitors=[ MonitorLL(name="final-valiset", data=valiset, n_samples=[1, 5, 25, 100]), MonitorLL(name="final-testset", data=testset, n_samples=[1, 5, 25, 100]), SampleFromP(data=valiset, n_samples=100), ], )
def rerun_monitors(args): from learning.utils.datalog import dlog, StoreToH5, TextPrinter from learning.experiment import Experiment from learning.monitor import MonitorLL, DLogModelParams, SampleFromP from learning.monitor.llbound import LLBound from learning.dataset import MNIST from learning.preproc import PermuteColumns from learning.rws import LayerStack from learning.sbn import SBN, SBNTop from learning.darn import DARN, DARNTop from learning.nade import NADE, NADETop import h5py logger.debug("Arguments %s" % args) tags = [] # Layer models layer_models = { "sbn": (SBN, SBNTop), "darn": (DARN, DARNTop), "nade": (NADE, NADETop), } if not args.p_model in layer_models: raise "Unknown P-layer model %s" % args.p_model p_layer, p_top = layer_models[args.p_model] if not args.q_model in layer_models: raise "Unknown P-layer model %s" % args.p_model q_layer, q_top = layer_models[args.q_model] # Layer sizes layer_sizes = [int(s) for s in args.layer_sizes.split(",")] n_X = 28 * 28 p_layers = [] q_layers = [] for ls in layer_sizes: n_Y = ls p_layers.append(p_layer(n_X=n_X, n_Y=n_Y, clamp_sigmoid=True)) q_layers.append(q_layer(n_X=n_Y, n_Y=n_X)) n_X = n_Y p_layers.append(p_top(n_X=n_X, clamp_sigmoid=True)) model = LayerStack(p_layers=p_layers, q_layers=q_layers) model.setup() # Dataset if args.shuffle: np.random.seed(23) preproc = [PermuteColumns()] tags += ["shuffle"] else: preproc = [] tags.sort() expname = args.cont if expname[-1] == "/": expname = expname[:-1] logger.info("Loading dataset...") testset = MNIST(fname="mnist_salakhutdinov.pkl.gz", which_set='test', preproc=preproc, n_datapoints=10000) #----------------------------------------------------------------------------- logger.info("Setting up monitors...") #monitors = [MonitorLL(data=testset, n_samples=[1, 5, 10, 25, 100, 500, 1000, 10000, 100000])] #monitors = [MonitorLL(data=testset, n_samples=[1, 5, 10, 25, 100, 500, 1000]), LLBound(data=testset, n_samples=[1, 5, 10, 25, 100, 500, 1000])] monitors = [ MonitorLL(data=testset, n_samples=[1, 5, 10, 25, 100]), LLBound(data=testset, n_samples=[1, 5, 10, 25, 100]) ] #monitors = [BootstrapLL(data=testset, n_samples=[1, 5, 10, 25, 100, 500, 1000])] #monitors = [MonitorLL(data=testset, n_samples=[500,])] #monitors = [SampleFromP(n_samples=200)] #----------------------------------------------------------------------------- result_dir = "reruns/%s" % os.path.basename(expname) results_fname = result_dir + "/results.h5" logger.info("Output logging to %s" % result_dir) os.makedirs(result_dir) dlog.set_handler("*", StoreToH5, results_fname) fname = args.cont + "/results.h5" logger.info("Loading from %s" % fname) with h5py.File(fname, "r") as h5: # Find a validation LL to report... LL_dataset = find_LL_channel(h5) LL = h5[LL_dataset][:] best_rows = list(np.argsort(-LL)[:args.best]) logger.info("Read LL from '%s'" % LL_dataset) logger.info("Final validation LL: %5.2f (iteration %d)" % (LL[-1], LL.shape[0])) for row in best_rows: logger.info(" validation LL: %5.2f (iteration %d)" % (LL[row], row)) for m in monitors: m.on_init(model) rows = [-1] + best_rows for row in rows: logger.info("Loading model (row %d)..." % -1) logger.info("LL on validation set: %f5.2" % LL[-1]) model.model_params_from_h5(h5, row=-1) run_monitors(model, monitors) logger.info("Finished.")