def train( out_path=None, name='', model_to_load=None, save_images=True, test_every=None, dim_h=None, rec_args=None, gen_args=None, prior='gaussian', preprocessing=None, learning_args=None, dataset_args=None): # ======================================================================== if preprocessing is None: preprocessing = [] if learning_args is None: learning_args = dict() if dataset_args is None: raise ValueError('Dataset args must be provided') learning_args = init_learning_args(**learning_args) print 'Dataset args: %s' % pprint.pformat(dataset_args) print 'Learning args: %s' % pprint.pformat(learning_args) # ======================================================================== print_section('Setting up data') batch_size = learning_args.pop('batch_size') valid_batch_size = learning_args.pop('valid_batch_size') train, valid, test = load_data( train_batch_size=batch_size, valid_batch_size=valid_batch_size, **dataset_args) # ======================================================================== print_section('Setting model and variables') dim_in = train.dims[train.name] X = T.matrix('x', dtype=floatX) X.tag.test_value = np.zeros((batch_size, dim_in), dtype=X.dtype) trng = get_trng() preproc = Preprocessor(preprocessing) X_i = preproc(X, data_iter=train) inps = [X] # ======================================================================== print_section('Loading model and forming graph') def create_model(): model = Helmholtz.factory( dim_h, train, prior=prior, rec_args=rec_args, gen_args=gen_args) models = OrderedDict() models[model.name] = model return models models = set_model(create_model, model_to_load, unpack) model = next((v for k, v in models.iteritems() if k in ['sbn', 'gbn', 'lbn']), None) posterior = model.posterior if not posterior.distribution.is_continuous: raise ValueError('Cannot perform VAE with posterior with distribution ' '%r' % type(posterior.distribution)) tparams = model.set_tparams() print_profile(tparams) # ========================================================================== print_section('Getting cost') constants = [] updates = theano.OrderedUpdates() n_posterior_samples = learning_args.pop('n_posterior_samples') reweight = learning_args.pop('reweight') results, samples, constants, updates = model( X_i, X, qk=None, pass_gradients=True, n_posterior_samples=n_posterior_samples, reweight=reweight) cost = results['cost'] extra_outs = [] extra_outs_keys = ['cost'] l2_decay = learning_args.pop('l2_decay') if l2_decay is not False and l2_decay > 0.: print 'Adding %.5f L2 weight decay' % l2_decay l2_rval = model.l2_decay(l2_decay) cost += l2_rval.pop('cost') extra_outs += l2_rval.values() extra_outs_keys += l2_rval.keys() # ========================================================================== print_section('Test functions') f_test_keys = results.keys() f_test = theano.function([X], results.values()) prior_samples, p_updates = model.sample_from_prior() f_prior = theano.function([], prior_samples, updates=p_updates) latent_vis = model.visualize_latents() f_latent = theano.function([], latent_vis) py = samples['py'] f_py_h = theano.function([X], py) # ======================================================================== print_section('Setting final tparams and save function') excludes = learning_args.pop('excludes') tparams, all_params = set_params( tparams, updates, excludes=excludes) def save(tparams, outfile): d = dict((k, v.get_value()) for k, v in all_params.items()) d.update( dim_h=dim_h, rec_args=rec_args, gen_args=gen_args ) np.savez(outfile, **d) def save_images(): p_samples = f_prior() p_samples = p_samples.reshape( (p_samples.shape[0] // 10, 10, p_samples.shape[1])) train.save_images(p_samples, path.join(out_path, 'prior_samples.png')) l_vis = f_latent() l_vis = l_vis.reshape((l_vis.shape[0] // 10, 10, l_vis.shape[1])) train.save_images(l_vis, path.join(out_path, 'latent_vis.png')) py_h = f_py_h(train.X[:100]) train.save_images(py_h, path.join(out_path, 'py_h.png')) # ======================================================================== print_section('Getting gradients and building optimizer.') f_grad_shared, f_grad_updates, learning_args = set_optimizer( inps, cost, tparams, constants, updates, extra_outs, **learning_args) # ======================================================================== print_section('Actually running (main loop)') monitor = SimpleMonitor() main_loop( train, valid, tparams, f_grad_shared, f_grad_updates, f_test, f_test_keys, test_every=test_every, save=save, save_images=save_images, monitor=monitor, out_path=out_path, name=name, extra_outs_keys=extra_outs_keys, **learning_args)
def train( out_path=None, name='', model_to_load=None, save_images=True, test_every=None, dim_h=None, preprocessing=None, learning_args=None, inference_args=None, dataset_args=None): # ======================================================================== if preprocessing is None: preprocessing = [] if learning_args is None: learning_args = dict() if inference_args is None: inference_args = dict() if dataset_args is None: raise ValueError('Dataset args must be provided') learning_args = init_learning_args(**learning_args) inference_args = init_inference_args(**inference_args) print 'Dataset args: %s' % pprint.pformat(dataset_args) print 'Learning args: %s' % pprint.pformat(learning_args) print 'Inference args: %s' % pprint.pformat(inference_args) # ======================================================================== print_section('Setting up data') batch_size = learning_args.pop('batch_size') valid_batch_size = learning_args.pop('valid_batch_size') train, valid, test = load_data( train_batch_size=batch_size, valid_batch_size=valid_batch_size, **dataset_args) # ======================================================================== print_section('Setting model and variables') dim_in = train.dims[train.name] X = T.matrix('x', dtype=floatX) X.tag.test_value = np.zeros((batch_size, dim_in), dtype=X.dtype) trng = get_trng() preproc = Preprocessor(preprocessing) X_i = preproc(X, data_iter=train) inps = [X] # ======================================================================== print_section('Loading model and forming graph') def create_model(): model = RBM(dim_in, dim_h, v_dist=train.distributions[train.name], mean_image=train.mean_image) models = OrderedDict() models[model.name] = model return models models = set_model(create_model, model_to_load, unpack) model = models['rbm'] tparams = model.set_tparams() print_profile(tparams) # ========================================================================== print_section('Getting cost') persistent = inference_args.pop('persistent') if persistent: H_p = theano.shared( np.zeros((inference_args['n_chains'], model.h_dist.dim)).astype(floatX), name='h_p') else: H_p = None results, samples, updates, constants = model( X_i, h_p=H_p, **inference_args) updates = theano.OrderedUpdates() if persistent: updates += theano.OrderedUpdates([(H_p, samples['hs'][-1])]) cost = results['cost'] extra_outs = [results['free_energy']] extra_outs_keys = ['cost', 'free_energy'] # ========================================================================== print_section('Test functions') f_test_keys = results.keys() f_test = theano.function([X], results.values()) try: _, z_updates = model.update_partition_function(K=1000) f_update_partition = theano.function([], [], updates=z_updates) except NotImplementedError: f_update_partition = None H0 = model.trng.binomial(size=(10, model.h_dist.dim), dtype=floatX) s_outs, s_updates = model.sample(H0, n_steps=100) f_chain = theano.function( [], model.v_dist.get_center(s_outs['pvs']), updates=s_updates) # ======================================================================== print_section('Setting final tparams and save function') excludes = learning_args.pop('excludes') tparams, all_params = set_params(tparams, updates, excludes=excludes) def save(tparams, outfile): d = dict((k, v.get_value()) for k, v in all_params.items()) d.update( dim_in=dim_in, dim_h=dim_h ) np.savez(outfile, **d) def save_images(): w = model.W.get_value().T w = w.reshape((10, w.shape[0] // 10, w.shape[1])) train.save_images(w, path.join(out_path, 'weights.png')) chain = f_chain() train.save_images(chain, path.join(out_path, 'chain.png')) # ======================================================================== print_section('Getting gradients and building optimizer.') f_grad_shared, f_grad_updates, learning_args = set_optimizer( [X], cost, tparams, constants, updates, extra_outs, **learning_args) # ======================================================================== print_section('Actually running (main loop)') monitor = SimpleMonitor() main_loop( train, valid, tparams, f_grad_shared, f_grad_updates, f_test, f_test_keys, f_extra=f_update_partition, test_every=test_every, save=save, save_images=save_images, monitor=monitor, out_path=out_path, name=name, extra_outs_keys=extra_outs_keys, **learning_args)
def train( out_path=None, name='', model_to_load=None, test_every=None, classifier=None, preprocessing=None, learning_args=None, dataset_args=None): '''Basic training script. Args: out_path: str, path for output directory. name: str, name of experiment. test_every: int (optional), if not None, test every n epochs instead of every 1 epoch. classifier: dict, kwargs for MLP factory. learning_args: dict or None, see `init_learning_args` above for options. dataset_args: dict, arguments for Dataset class. ''' # ======================================================================== if preprocessing is None: preprocessing = [] if learning_args is None: learning_args = dict() if dataset_args is None: raise ValueError('Dataset args must be provided') learning_args = init_learning_args(**learning_args) print 'Dataset args: %s' % pprint.pformat(dataset_args) print 'Learning args: %s' % pprint.pformat(learning_args) # ======================================================================== print_section('Setting up data') input_keys = dataset_args.pop('keys') batch_size = learning_args.pop('batch_size') valid_batch_size = learning_args.pop('valid_batch_size') train, valid, test = load_data( train_batch_size=batch_size, valid_batch_size=valid_batch_size, **dataset_args) # ======================================================================== print_section('Setting model and variables') dim_in = train.dims[input_keys[0]] dim_out = train.dims[input_keys[1]] X = T.matrix('x', dtype=floatX) # Input data Y = T.matrix('y', dtype=floatX) # Lables X.tag.test_value = np.zeros((batch_size, dim_in), dtype=X.dtype) Y.tag.test_value = np.zeros((batch_size, dim_out), dtype=X.dtype) trng = get_trng() preproc = Preprocessor(preprocessing) X_i = preproc(X, data_iter=train) inps = [X, Y] # ======================================================================== print_section('Loading model and forming graph') dropout = learning_args.pop('dropout') def create_model(): model = MLP.factory(dim_in=dim_in, dim_out=dim_out, distribution=train.distributions[input_keys[1]], dropout=dropout, **classifier) models = OrderedDict() models[model.name] = model return models def unpack(dim_in=None, dim_out=None, mlp=None, **model_args): model = MLP.factory(dim_in=dim_in, dim_out=dim_out, **mlp) models = [model] return models, model_args, None models = set_model(create_model, model_to_load, unpack) model = models['MLP'] tparams = model.set_tparams() print_profile(tparams) # ========================================================================== print_section('Getting cost') outs = model(X_i) p = outs['p'] base_cost = model.neg_log_prob(Y, p).sum(axis=0) cost = base_cost updates = theano.OrderedUpdates() l2_decay = learning_args.pop('l2_decay') if l2_decay > 0.: print 'Adding %.5f L2 weight decay' % l2_decay l2_rval = model.l2_decay(l2_decay) l2_cost = l2_rval.pop('cost') cost += l2_cost constants = [] extra_outs = [] extra_outs_keys = ['cost'] # ========================================================================== print_section('Test functions') error = (Y * (1 - p)).sum(axis=1).mean() f_test_keys = ['error', 'cost'] f_test_vals = [error, base_cost] if l2_decay > 0.: f_test_keys.append('L2 cost') f_test_vals.append(l2_cost) f_test = theano.function([X, Y], f_test_vals) # ======================================================================== print_section('Setting final tparams and save function') tparams, all_params = set_params(tparams, updates) def save(tparams, outfile): d = dict((k, v.get_value()) for k, v in all_params.items()) d.update( dim_in=dim_in, dim_out=dim_out, mlp=classifier ) np.savez(outfile, **d) # ======================================================================== print_section('Getting gradients and building optimizer.') f_grad_shared, f_grad_updates, learning_args = set_optimizer( inps, cost, tparams, constants, updates, extra_outs, **learning_args) # ======================================================================== print_section('Actually running (main loop)') monitor = SimpleMonitor() main_loop( train, valid, tparams, f_grad_shared, f_grad_updates, f_test, f_test_keys, input_keys=input_keys, test_every=test_every, save=save, monitor=monitor, out_path=out_path, name=name, extra_outs_keys=extra_outs_keys, **learning_args)
def train(out_path=None, name='', model_to_load=None, save_images=True, test_every=None, dim_h=None, preprocessing=None, learning_args=None, inference_args=None, dataset_args=None): # ======================================================================== if preprocessing is None: preprocessing = [] if learning_args is None: learning_args = dict() if inference_args is None: inference_args = dict() if dataset_args is None: raise ValueError('Dataset args must be provided') learning_args = init_learning_args(**learning_args) inference_args = init_inference_args(**inference_args) print 'Dataset args: %s' % pprint.pformat(dataset_args) print 'Learning args: %s' % pprint.pformat(learning_args) print 'Inference args: %s' % pprint.pformat(inference_args) # ======================================================================== print_section('Setting up data') batch_size = learning_args.pop('batch_size') valid_batch_size = learning_args.pop('valid_batch_size') train, valid, test = load_data(train_batch_size=batch_size, valid_batch_size=valid_batch_size, **dataset_args) # ======================================================================== print_section('Setting model and variables') dim_in = train.dims[train.name] X = T.matrix('x', dtype=floatX) X.tag.test_value = np.zeros((batch_size, dim_in), dtype=X.dtype) trng = get_trng() preproc = Preprocessor(preprocessing) X_i = preproc(X, data_iter=train) inps = [X] # ======================================================================== print_section('Loading model and forming graph') def create_model(): model = RBM(dim_in, dim_h, v_dist=train.distributions[train.name], mean_image=train.mean_image) models = OrderedDict() models[model.name] = model return models models = set_model(create_model, model_to_load, unpack) model = models['rbm'] tparams = model.set_tparams() print_profile(tparams) # ========================================================================== print_section('Getting cost') persistent = inference_args.pop('persistent') if persistent: H_p = theano.shared(np.zeros( (inference_args['n_chains'], model.h_dist.dim)).astype(floatX), name='h_p') else: H_p = None results, samples, updates, constants = model(X_i, h_p=H_p, **inference_args) updates = theano.OrderedUpdates() if persistent: updates += theano.OrderedUpdates([(H_p, samples['hs'][-1])]) cost = results['cost'] extra_outs = [results['free_energy']] extra_outs_keys = ['cost', 'free_energy'] # ========================================================================== print_section('Test functions') f_test_keys = results.keys() f_test = theano.function([X], results.values()) try: _, z_updates = model.update_partition_function(K=1000) f_update_partition = theano.function([], [], updates=z_updates) except NotImplementedError: f_update_partition = None H0 = model.trng.binomial(size=(10, model.h_dist.dim), dtype=floatX) s_outs, s_updates = model.sample(H0, n_steps=100) f_chain = theano.function([], model.v_dist.get_center(s_outs['pvs']), updates=s_updates) # ======================================================================== print_section('Setting final tparams and save function') excludes = learning_args.pop('excludes') tparams, all_params = set_params(tparams, updates, excludes=excludes) def save(tparams, outfile): d = dict((k, v.get_value()) for k, v in all_params.items()) d.update(dim_in=dim_in, dim_h=dim_h) np.savez(outfile, **d) def save_images(): w = model.W.get_value().T w = w.reshape((10, w.shape[0] // 10, w.shape[1])) train.save_images(w, path.join(out_path, 'weights.png')) chain = f_chain() train.save_images(chain, path.join(out_path, 'chain.png')) # ======================================================================== print_section('Getting gradients and building optimizer.') f_grad_shared, f_grad_updates, learning_args = set_optimizer( [X], cost, tparams, constants, updates, extra_outs, **learning_args) # ======================================================================== print_section('Actually running (main loop)') monitor = SimpleMonitor() main_loop(train, valid, tparams, f_grad_shared, f_grad_updates, f_test, f_test_keys, f_extra=f_update_partition, test_every=test_every, save=save, save_images=save_images, monitor=monitor, out_path=out_path, name=name, extra_outs_keys=extra_outs_keys, **learning_args)
def train(out_path=None, name='', model_to_load=None, save_images=True, test_every=None, dim_h=None, rec_args=None, gen_args=None, prior='gaussian', preprocessing=None, learning_args=None, dataset_args=None): # ======================================================================== if preprocessing is None: preprocessing = [] if learning_args is None: learning_args = dict() if dataset_args is None: raise ValueError('Dataset args must be provided') learning_args = init_learning_args(**learning_args) print 'Dataset args: %s' % pprint.pformat(dataset_args) print 'Learning args: %s' % pprint.pformat(learning_args) # ======================================================================== print_section('Setting up data') batch_size = learning_args.pop('batch_size') valid_batch_size = learning_args.pop('valid_batch_size') dataset = dataset_args['dataset'] dataset_class = resolve_dataset(dataset) train, valid, test, idx = load_data_split( dataset_class, train_batch_size=batch_size, valid_batch_size=valid_batch_size, **dataset_args) dataset_args['idx'] = idx # ======================================================================== print_section('Setting model and variables') dim_in = train.dims[train.name] X = T.matrix('x', dtype=floatX) X.tag.test_value = np.zeros((batch_size, dim_in), dtype=X.dtype) trng = get_trng() preproc = Preprocessor(preprocessing) X_i = preproc(X, data_iter=train) inps = [X] # ======================================================================== print_section('Loading model and forming graph') def create_model(): model = Helmholtz.factory(dim_h, train, prior=prior, rec_args=rec_args, gen_args=gen_args) models = OrderedDict() models[model.name] = model return models models = set_model(create_model, model_to_load, unpack) model = next( (v for k, v in models.iteritems() if k in ['sbn', 'gbn', 'lbn', 'labn']), None) posterior = model.posterior if not posterior.distribution.is_continuous: raise ValueError('Cannot perform VAE with posterior with distribution ' '%r' % type(posterior.distribution)) tparams = model.set_tparams() print_profile(tparams) # ========================================================================== print_section('Getting cost') constants = [] updates = theano.OrderedUpdates() n_posterior_samples = learning_args.pop('n_posterior_samples') results, samples, updates, constants = model( X_i, X, qk=None, pass_gradients=True, n_posterior_samples=n_posterior_samples) cost = results['cost'] extra_outs = [] extra_outs_keys = ['cost'] l2_decay = learning_args.pop('l2_decay') if l2_decay is not False and l2_decay > 0.: print 'Adding %.5f L2 weight decay' % l2_decay l2_rval = model.l2_decay(l2_decay) cost += l2_rval.pop('cost') extra_outs += l2_rval.values() extra_outs_keys += l2_rval.keys() # ========================================================================== print_section('Test functions') f_test_keys = results.keys() f_test = theano.function([X], results.values()) prior_samples, p_updates = model.sample_from_prior() f_prior = theano.function([], model.get_center(prior_samples), updates=p_updates) latent_vis = model.visualize_latents() f_latent = theano.function([], latent_vis) py = model.get_center(samples['py']) f_py_h = theano.function([X], py) # ======================================================================== print_section('Setting final tparams and save function') excludes = learning_args.pop('excludes') tparams, all_params = set_params(tparams, updates, excludes=excludes) def save(tparams, outfile): d = dict((k, v.get_value()) for k, v in all_params.items()) d.update(dim_h=dim_h, rec_args=rec_args, gen_args=gen_args) np.savez(outfile, **d) def save_images(): p_samples = f_prior() train.save_images(p_samples, path.join(out_path, 'prior_samples.png')) l_vis = f_latent() train.save_images(l_vis, path.join(out_path, 'latent_vis.png')) py_h = f_py_h(train.X[:100]) train.save_images(py_h, path.join(out_path, 'py_h.png')) # ======================================================================== print_section('Getting gradients and building optimizer.') f_grad_shared, f_grad_updates, learning_args = set_optimizer( inps, cost, tparams, constants, updates, extra_outs, **learning_args) # ======================================================================== print_section('Actually running (main loop)') monitor = SimpleMonitor() main_loop(train, valid, tparams, f_grad_shared, f_grad_updates, f_test, f_test_keys, test_every=test_every, save=save, save_images=save_images, monitor=monitor, out_path=out_path, name=name, extra_outs_keys=extra_outs_keys, **learning_args)