def _build_trainer(nb_epochs): print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(nb_epochs)) with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, use_mask_as_input=use_mask_as_input) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize(initer.UniformInitializer(random_seed=1234)) with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size) trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. nll = views.LossView( loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask( validset, batch_size=len(validset), keep_mask=True)) trainer.append_task( tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs)) return trainer, nll, logger
def main(): parser = build_argparser() args = parser.parse_args() # Load experiments hyperparameters try: hyperparams = smartutils.load_dict_from_json_file( pjoin(args.experiment, "hyperparams.json")) except: hyperparams = smartutils.load_dict_from_json_file( pjoin(args.experiment, '..', "hyperparams.json")) model = load_model(args.experiment) print(str(model)) with Timer("Loading dataset"): trainset, validset, testset = datasets.load(hyperparams['dataset'], keep_on_cpu=True) print(" (data: {:,}; {:,}; {:,}) ".format(len(trainset), len(validset), len(testset)), end="") # Result files. result_file = pjoin(args.experiment, "results_estimate.json") if not os.path.isfile(result_file) or args.force: with Timer("Evaluating NLL estimate"): results = {"seed": args.seed} results['trainset'] = estimate_NLL(model, trainset, seed=args.seed, batch_size=args.batch_size) results['validset'] = estimate_NLL(model, validset, seed=args.seed, batch_size=args.batch_size) results['testset'] = estimate_NLL(model, testset, seed=args.seed, batch_size=args.batch_size) utils.save_dict_to_json_file(result_file, {"NLL_estimate": results}) else: print("Loading saved results... (use --force to re-run evaluation)") results = utils.load_dict_from_json_file(result_file)['NLL_estimate'] for dataset in ['trainset', 'validset', 'testset']: print("NLL estimate on {}: {:.2f} ± {:.2f}".format( dataset, results[dataset]['mean'], results[dataset]['stderror']))
def main(): parser = buildArgsParser() args = parser.parse_args() # Load experiments hyperparameters try: hyperparams = smartutils.load_dict_from_json_file(pjoin(args.experiment, "hyperparams.json")) except: hyperparams = smartutils.load_dict_from_json_file(pjoin(args.experiment, '..', "hyperparams.json")) model = load_model(args.experiment) print(str(model)) with Timer("Generating {} samples from Conv Deep NADE".format(args.count)): sample = model.build_sampling_function(seed=args.seed) samples, probs = sample(args.count, return_probs=True, ordering_seed=args.seed) if args.out is not None: outfile = pjoin(args.experiment, args.out) with Timer("Saving {0} samples to '{1}'".format(args.count, outfile)): np.save(outfile, samples) if args.view: import pylab as plt from convnade import vizu if hyperparams["dataset"] == "binarized_mnist": image_shape = (28, 28) else: raise ValueError("Unknown dataset: {0}".format(hyperparams["dataset"])) plt.figure() data = vizu.concatenate_images(samples, shape=image_shape, border_size=1, clim=(0, 1)) plt.imshow(data, cmap=plt.cm.gray, interpolation='nearest') plt.title("Samples") plt.figure() data = vizu.concatenate_images(probs, shape=image_shape, border_size=1, clim=(0, 1)) plt.imshow(data, cmap=plt.cm.gray, interpolation='nearest') plt.title("Probs") plt.show()
def load_model(experiment_path): with Timer("Loading model"): from convnade import DeepConvNadeUsingLasagne, DeepConvNadeWithResidualUsingLasagne from convnade import DeepConvNADE, DeepConvNADEWithResidual for model_class in [DeepConvNadeUsingLasagne, DeepConvNadeWithResidualUsingLasagne, DeepConvNADE, DeepConvNADEWithResidual]: try: model = model_class.create(experiment_path) return model except Exception as e: print (e) pass raise NameError("No model found!") return None
def main(): parser = build_argparser() args = parser.parse_args() # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) if not os.path.isdir(pjoin(experiment_path, "evaluation")): parser.error('Cannot find evaluations for experiment: {0}!'.format( experiment_path)) results_file = pjoin(experiment_path, "results.json") if not os.path.isfile(results_file) or args.force: with Timer("Merging NLL evaluations"): results = merge_NLL_evaluations( evaluation_dir=pjoin(experiment_path, "evaluation")) smartutils.save_dict_to_json_file(results_file, {"NLL": results}) else: print("Loading saved losses... (use --force to re-run evaluation)") results = smartutils.load_dict_from_json_file(results_file)["NLL"] nb_orderings = results['nb_orderings'] for dataset in ['validset', 'testset']: print("NLL estimate on {} ({} orderings): {:.2f} ± {:.2f}".format( dataset, nb_orderings, results[dataset]['mean'], results[dataset]['stderror'])) if results[dataset]['incomplete']: print( "** Warning **: {} evaluation is incomplete. Missing some orderings or batches." .format(dataset))
def test_simple_convnade(): nb_kernels = 8 kernel_shape = (2, 2) hidden_activation = "sigmoid" consider_mask_as_channel = True batch_size = 1024 ordering_seed = 1234 max_epoch = 3 nb_orderings = 1 print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(max_epoch)) with Timer("Loading/processing binarized MNIST"): trainset, validset, testset = load_binarized_mnist() # Extract the center patch (4x4 pixels) of each image. indices_to_keep = [ 348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432, 433, 434, 435 ] trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep], trainset.inputs.get_value()[:, indices_to_keep], name="trainset") validset = Dataset(validset.inputs.get_value()[:, indices_to_keep], validset.inputs.get_value()[:, indices_to_keep], name="validset") testset = Dataset(testset.inputs.get_value()[:, indices_to_keep], testset.inputs.get_value()[:, indices_to_keep], name="testset") image_shape = (4, 4) nb_channels = 1 with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, consider_mask_as_channel=True) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize() # By default, uniform initialization. with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size) trainer = Trainer(optimizer, batch_scheduler) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. nll = views.LossView( loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask( validset, batch_size=len(validset))) trainer.append_task( tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) trainer.build_theano_graph() with Timer("Training"): trainer.train() with Timer("Checking the probs for all possible inputs sum to 1"): # rng = np.random.RandomState(ordering_seed) D = np.prod(image_shape) batch_scheduler = BatchSchedulerWithAutoregressiveMasks( validset, batch_size=len(validset), batch_id=0, ordering_id=0, concatenate_mask=model.nb_channels == 2, seed=42) nll = views.LossView( loss=NllUsingBinaryCrossEntropyWithAutoRegressiveMask( model, validset, batch_scheduler.mod), batch_scheduler=batch_scheduler) nlls_xod_given_xoltd = nll.losses.view(Status()) nlls = np.sum(nlls_xod_given_xoltd.reshape(-1, len(validset)), axis=0) nll_validset = np.mean(nlls) print("Sum of NLL for validset:", nll_validset) inputs = cartesian([[0, 1]] * int(D), dtype=np.float32) dataset = ReconstructionDataset(inputs) batch_scheduler = BatchSchedulerWithAutoregressiveMasks( dataset, batch_size=len(dataset), batch_id=0, ordering_id=0, concatenate_mask=model.nb_channels == 2, seed=42) nll = views.LossView( loss=NllUsingBinaryCrossEntropyWithAutoRegressiveMask( model, dataset, batch_scheduler.mod), batch_scheduler=batch_scheduler) nlls_xod_given_xoltd = nll.losses.view(Status()) nlls = np.sum(nlls_xod_given_xoltd.reshape(-1, len(dataset)), axis=0) p_x = np.exp(np.logaddexp.reduce(-nlls)) print("Sum of p(x) for all x:", p_x) assert_almost_equal(p_x, 1., decimal=5)
def main(): parser = buildArgsParser() args = parser.parse_args() # Extract experiments hyperparameters hyperparams = dict(vars(args)) # Remove hyperparams that should not be part of the hash del hyperparams['max_epoch'] del hyperparams['keep'] del hyperparams['force'] del hyperparams['name'] # Get/generate experiment name experiment_name = args.name if experiment_name is None: experiment_name = utils.generate_uid_from_string(repr(hyperparams)) # Create experiment folder experiment_path = pjoin(".", "experiments", experiment_name) resuming = False if os.path.isdir(experiment_path) and not args.force: resuming = True print("### Resuming experiment ({0}). ###\n".format(experiment_name)) # Check if provided hyperparams match those in the experiment folder hyperparams_loaded = utils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json")) if hyperparams != hyperparams_loaded: print("{\n" + "\n".join(["{}: {}".format(k, hyperparams[k]) for k in sorted(hyperparams.keys())]) + "\n}") print("{\n" + "\n".join(["{}: {}".format(k, hyperparams_loaded[k]) for k in sorted(hyperparams_loaded.keys())]) + "\n}") print("The arguments provided are different than the one saved. Use --force if you are certain.\nQuitting.") sys.exit(1) else: if os.path.isdir(experiment_path): shutil.rmtree(experiment_path) os.makedirs(experiment_path) utils.save_dict_to_json_file(pjoin(experiment_path, "hyperparams.json"), hyperparams) with Timer("Loading dataset"): trainset, validset, testset = datasets.load(args.dataset) image_shape = (28, 28) nb_channels = 1 + (args.use_mask_as_input is True) batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask(trainset, args.batch_size, use_mask_as_input=args.use_mask_as_input, seed=args.ordering_seed) print("{} updates per epoch.".format(len(batch_scheduler))) with Timer("Building model"): if args.use_lasagne: if args.with_residual: model = DeepConvNadeWithResidualUsingLasagne(image_shape=image_shape, nb_channels=nb_channels, convnet_blueprint=args.convnet_blueprint, fullnet_blueprint=args.fullnet_blueprint, hidden_activation=args.hidden_activation, use_mask_as_input=args.use_mask_as_input) else: model = DeepConvNadeUsingLasagne(image_shape=image_shape, nb_channels=nb_channels, convnet_blueprint=args.convnet_blueprint, fullnet_blueprint=args.fullnet_blueprint, hidden_activation=args.hidden_activation, use_mask_as_input=args.use_mask_as_input, use_batch_norm=args.batch_norm) elif args.with_residual: model = DeepConvNADEWithResidual(image_shape=image_shape, nb_channels=nb_channels, convnet_blueprint=args.convnet_blueprint, fullnet_blueprint=args.fullnet_blueprint, hidden_activation=args.hidden_activation, use_mask_as_input=args.use_mask_as_input) else: builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, hidden_activation=args.hidden_activation, use_mask_as_input=args.use_mask_as_input) if args.blueprints_seed is not None: convnet_blueprint, fullnet_blueprint = generate_blueprints(args.blueprint_seed, image_shape[0]) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) else: if args.convnet_blueprint is not None: builder.build_convnet_from_blueprint(args.convnet_blueprint) if args.fullnet_blueprint is not None: builder.build_fullnet_from_blueprint(args.fullnet_blueprint) model = builder.build() # print(str(model.convnet)) # print(str(model.fullnet)) model.initialize(weigths_initializer_factory(args.weights_initialization, seed=args.initialization_seed)) print(str(model)) with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask(model, trainset) optimizer = optimizer_factory(hyperparams, loss) with Timer("Building trainer"): trainer = Trainer(optimizer, batch_scheduler) if args.max_epoch is not None: trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch)) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task(tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. model.deterministic = True # For batch normalization, see https://github.com/Lasagne/Lasagne/blob/master/lasagne/layers/normalization.py#L198 nll = views.LossView(loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask(model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask(validset, batch_size=0.1*len(validset), use_mask_as_input=args.use_mask_as_input, keep_mask=True, seed=args.ordering_seed+1)) # trainer.append_task(tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror, each_k_update=100)) trainer.append_task(tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) # direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values())))) # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm, each_k_update=50)) # Save training progression def save_model(*args): trainer.save(experiment_path) trainer.append_task(stopping_criteria.EarlyStopping(nll.mean, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_model)) trainer.build_theano_graph() if resuming: with Timer("Loading"): trainer.load(experiment_path) with Timer("Training"): trainer.train() trainer.save(experiment_path) model.save(experiment_path)
def test_new_fprop_matches_old_fprop(): nb_kernels = 8 kernel_shape = (2, 2) hidden_activation = "sigmoid" use_mask_as_input = True batch_size = 1024 ordering_seed = 1234 max_epoch = 10 nb_orderings = 1 print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(max_epoch)) with Timer("Loading/processing binarized MNIST"): trainset, validset, testset = load_binarized_mnist() # Extract the center patch (4x4 pixels) of each image. indices_to_keep = [ 348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432, 433, 434, 435 ] trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep], trainset.inputs.get_value()[:, indices_to_keep], name="trainset") validset = Dataset(validset.inputs.get_value()[:, indices_to_keep], validset.inputs.get_value()[:, indices_to_keep], name="validset") testset = Dataset(testset.inputs.get_value()[:, indices_to_keep], testset.inputs.get_value()[:, indices_to_keep], name="testset") image_shape = (4, 4) nb_channels = 1 + (use_mask_as_input is True) with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, use_mask_as_input=use_mask_as_input) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize() # By default, uniform initialization. with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size, use_mask_as_input=use_mask_as_input) trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) trainer.build_theano_graph() with Timer("Training"): trainer.train() mask_o_lt_d = batch_scheduler._shared_batch_mask fprop_output, fprop_pre_output = model.fprop( trainset.inputs, mask_o_lt_d, return_output_preactivation=True) model_output = model.get_output( T.concatenate([trainset.inputs * mask_o_lt_d, mask_o_lt_d], axis=1)) assert_array_equal(model_output.eval(), fprop_pre_output.eval()) print(np.sum(abs(model_output.eval() - fprop_pre_output.eval())))
def test_save_load_convnade(): nb_kernels = 8 kernel_shape = (2, 2) hidden_activation = "hinge" use_mask_as_input = True batch_size = 1024 ordering_seed = 1234 max_epoch = 5 nb_orderings = 1 with Timer("Loading/processing binarized MNIST"): trainset, validset, testset = load_binarized_mnist() # Extract the center patch (4x4 pixels) of each image. indices_to_keep = [ 348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432, 433, 434, 435 ] trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep], trainset.inputs.get_value()[:, indices_to_keep], name="trainset") validset = Dataset(validset.inputs.get_value()[:, indices_to_keep], validset.inputs.get_value()[:, indices_to_keep], name="validset") testset = Dataset(testset.inputs.get_value()[:, indices_to_keep], testset.inputs.get_value()[:, indices_to_keep], name="testset") image_shape = (4, 4) nb_channels = 1 # Nested function to build a trainer. def _build_trainer(nb_epochs): print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(nb_epochs)) with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, use_mask_as_input=use_mask_as_input) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize(initer.UniformInitializer(random_seed=1234)) with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size) trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. nll = views.LossView( loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask( validset, batch_size=len(validset), keep_mask=True)) trainer.append_task( tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs)) return trainer, nll, logger trainer1, nll1, logger1 = _build_trainer(nb_epochs=10) with Timer("Compiling training graph"): trainer1.build_theano_graph() with Timer("Training"): trainer1.train() trainer2a, nll2a, logger2a = _build_trainer(5) with Timer("Compiling training graph"): trainer2a.build_theano_graph() with Timer("Training"): trainer2a.train() # Save model halfway during training and resume it. with tempfile.TemporaryDirectory() as experiment_dir: with Timer("Saving"): # Save current state of the model (i.e. after 5 epochs). trainer2a.save(experiment_dir) with Timer("Loading"): # Load previous state from which training will resume. trainer2b, nll2b, logger2b = _build_trainer(10) trainer2b.load(experiment_dir) # Check we correctly reloaded the model. assert_equal(len(trainer2a._optimizer.loss.model.parameters), len(trainer2b._optimizer.loss.model.parameters)) for param1, param2 in zip( trainer2a._optimizer.loss.model.parameters, trainer2b._optimizer.loss.model.parameters): assert_array_equal(param1.get_value(), param2.get_value(), err_msg=param1.name) with Timer("Compiling training graph"): trainer2b.build_theano_graph() with Timer("Training"): trainer2b.train() # Check we correctly resumed training. assert_equal(len(trainer1._optimizer.loss.model.parameters), len(trainer2b._optimizer.loss.model.parameters)) for param1, param2 in zip(trainer1._optimizer.loss.model.parameters, trainer2b._optimizer.loss.model.parameters): # I tested it, they are exactly equal when using float64. assert_array_almost_equal(param1.get_value(), param2.get_value(), err_msg=param1.name) # I tested it, they are exactly equal when using float64. assert_array_almost_equal(nll1.mean.view(trainer1.status), nll2b.mean.view(trainer2b.status)) assert_array_almost_equal(nll1.stderror.view(trainer1.status), nll2b.stderror.view(trainer2b.status)) # I tested it, they are exactly equal when using float64. assert_array_almost_equal( logger1.get_variable_history(0), logger2a.get_variable_history(0) + logger2b.get_variable_history(0)) assert_array_almost_equal( logger1.get_variable_history(1), logger2a.get_variable_history(1) + logger2b.get_variable_history(1))
def test_simple_convnade(): nb_kernels = 8 kernel_shape = (2, 2) hidden_activation = "sigmoid" use_mask_as_input = True batch_size = 1024 ordering_seed = 1234 max_epoch = 3 nb_orderings = 1 print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(max_epoch)) with Timer("Loading/processing binarized MNIST"): trainset, validset, testset = load_binarized_mnist() # Extract the center patch (4x4 pixels) of each image. indices_to_keep = [ 348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432, 433, 434, 435 ] trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep], trainset.inputs.get_value()[:, indices_to_keep], name="trainset") validset = Dataset(validset.inputs.get_value()[:, indices_to_keep], validset.inputs.get_value()[:, indices_to_keep], name="validset") testset = Dataset(testset.inputs.get_value()[:, indices_to_keep], testset.inputs.get_value()[:, indices_to_keep], name="testset") image_shape = (4, 4) nb_channels = 1 with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, use_mask_as_input=use_mask_as_input) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize() # By default, uniform initialization. with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size) trainer = Trainer(optimizer, batch_scheduler) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. nll = views.LossView( loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask( validset, batch_size=len(validset))) trainer.append_task( tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) trainer.build_theano_graph() with Timer("Training"): trainer.train() with Timer("Checking the probs for all possible inputs sum to 1"): rng = np.random.RandomState(ordering_seed) D = np.prod(image_shape) inputs = cartesian([[0, 1]] * int(D), dtype=np.float32) ordering = np.arange(D, dtype=np.int32) rng.shuffle(ordering) symb_input = T.vector("input") symb_input.tag.test_value = inputs[-len(inputs) // 4] symb_ordering = T.ivector("ordering") symb_ordering.tag.test_value = ordering nll_of_x_given_o = theano.function([symb_input, symb_ordering], model.nll_of_x_given_o( symb_input, symb_ordering), name="nll_of_x_given_o") #theano.printing.pydotprint(nll_of_x_given_o, '{0}_nll_of_x_given_o_{1}'.format(model.__class__.__name__, theano.config.device), with_ids=True) for i in range(nb_orderings): print("Ordering:", ordering) ordering = np.arange(D, dtype=np.int32) rng.shuffle(ordering) nlls = [] for no, input in enumerate(inputs): print("{}/{}".format(no, len(inputs)), end='\r') nlls.append(nll_of_x_given_o(input, ordering)) print("{}/{} Done".format(len(inputs), len(inputs))) p_x = np.exp(np.logaddexp.reduce(-np.array(nlls))) print("Sum of p(x) for all x:", p_x) assert_almost_equal(p_x, 1., decimal=5)