def optimizer_factory(hyperparams, loss): # Set learning rate method that will be used. if hyperparams["SGD"] is not None: from smartlearner.optimizers import SGD from smartlearner.direction_modifiers import ConstantLearningRate options = hyperparams["SGD"].split() optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(lr=float(options[0]))) return optimizer elif hyperparams["AdaGrad"] is not None: from smartlearner.optimizers import AdaGrad options = hyperparams["AdaGrad"].split() lr = float(options[0]) eps = float(options[1]) if len(options) > 1 else 1e-6 return AdaGrad(loss=loss, lr=lr, eps=eps) elif hyperparams["Adam"] is not None: from smartlearner.optimizers import Adam options = hyperparams["Adam"].split() lr = float(options[0]) if len(options) > 0 else 0.0001 return Adam(loss=loss, lr=lr) elif hyperparams["RMSProp"] is not None: from smartlearner.optimizers import RMSProp lr = float(hyperparams["RMSProp"]) return RMSProp(loss=loss, lr=lr) elif hyperparams["Adadelta"]: from smartlearner.optimizers import Adadelta return Adadelta(loss=loss) else: raise ValueError("The optimizer is mandatory!")
def _build_trainer(nb_epochs): print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(nb_epochs)) with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, use_mask_as_input=use_mask_as_input) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize(initer.UniformInitializer(random_seed=1234)) with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size) trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. nll = views.LossView( loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask( validset, batch_size=len(validset), keep_mask=True)) trainer.append_task( tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs)) return trainer, nll, logger
def _build_experiment(self): # Create an Nd gaussian function to optimize. This function is not # well-conditioned and there exists no perfect gradient step to converge in # only one iteration. N = 4 center = 5 * np.ones((1, N)).astype(floatX) param = sharedX(np.zeros((1, N))) cost = T.sum( 0.5 * T.dot(T.dot((param - center), np.diag(1. / np.arange(1, N + 1))), (param - center).T)) loss = DummyLossWithGradient(cost, param) optimizer = SGD(loss) direction_modifier = ConstantLearningRate(lr=self.lr) optimizer.append_direction_modifier(direction_modifier) trainer = Trainer(optimizer, DummyBatchScheduler()) # Monitor the learning rate. logger = tasks.Logger( views.MonitorVariable( list(direction_modifier.parameters.values())[0])) trainer.append_task(logger) return trainer, logger, direction_modifier
def test_simple_perceptron(): # Loading dataset trainset, validset, testset = load_mnist() # Creating model nb_classes = 10 model = Perceptron(trainset.input_size, nb_classes) model.initialize() # By default, uniform initialization. # Building optimizer loss = NLL(model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.1)) # Use mini batches of 100 examples. batch_scheduler = MiniBatchScheduler(trainset, 100) # Build trainer and add some tasks. trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print NLL mean/stderror. nll = views.LossView(loss=NLL(model, validset), batch_scheduler=FullBatchScheduler(validset)) trainer.append_task( tasks.Print("Validset - NLL : {0:.1%} ± {1:.1%}", nll.mean, nll.stderror)) # Print mean/stderror of classification errors. classif_error = views.LossView( loss=ClassificationError(model, validset), batch_scheduler=FullBatchScheduler(validset)) trainer.append_task( tasks.Print("Validset - Classif error: {0:.1%} ± {1:.1%}", classif_error.mean, classif_error.stderror)) # Train for 10 epochs (stopping criteria should be added at the end). trainer.append_task(stopping_criteria.MaxEpochStopping(10)) trainer.train()
def test_simple_convnade(): nb_kernels = 8 kernel_shape = (2, 2) hidden_activation = "sigmoid" consider_mask_as_channel = True batch_size = 1024 ordering_seed = 1234 max_epoch = 3 nb_orderings = 1 print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(max_epoch)) with Timer("Loading/processing binarized MNIST"): trainset, validset, testset = load_binarized_mnist() # Extract the center patch (4x4 pixels) of each image. indices_to_keep = [ 348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432, 433, 434, 435 ] trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep], trainset.inputs.get_value()[:, indices_to_keep], name="trainset") validset = Dataset(validset.inputs.get_value()[:, indices_to_keep], validset.inputs.get_value()[:, indices_to_keep], name="validset") testset = Dataset(testset.inputs.get_value()[:, indices_to_keep], testset.inputs.get_value()[:, indices_to_keep], name="testset") image_shape = (4, 4) nb_channels = 1 with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, consider_mask_as_channel=True) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize() # By default, uniform initialization. with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size) trainer = Trainer(optimizer, batch_scheduler) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. nll = views.LossView( loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask( validset, batch_size=len(validset))) trainer.append_task( tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) trainer.build_theano_graph() with Timer("Training"): trainer.train() with Timer("Checking the probs for all possible inputs sum to 1"): # rng = np.random.RandomState(ordering_seed) D = np.prod(image_shape) batch_scheduler = BatchSchedulerWithAutoregressiveMasks( validset, batch_size=len(validset), batch_id=0, ordering_id=0, concatenate_mask=model.nb_channels == 2, seed=42) nll = views.LossView( loss=NllUsingBinaryCrossEntropyWithAutoRegressiveMask( model, validset, batch_scheduler.mod), batch_scheduler=batch_scheduler) nlls_xod_given_xoltd = nll.losses.view(Status()) nlls = np.sum(nlls_xod_given_xoltd.reshape(-1, len(validset)), axis=0) nll_validset = np.mean(nlls) print("Sum of NLL for validset:", nll_validset) inputs = cartesian([[0, 1]] * int(D), dtype=np.float32) dataset = ReconstructionDataset(inputs) batch_scheduler = BatchSchedulerWithAutoregressiveMasks( dataset, batch_size=len(dataset), batch_id=0, ordering_id=0, concatenate_mask=model.nb_channels == 2, seed=42) nll = views.LossView( loss=NllUsingBinaryCrossEntropyWithAutoRegressiveMask( model, dataset, batch_scheduler.mod), batch_scheduler=batch_scheduler) nlls_xod_given_xoltd = nll.losses.view(Status()) nlls = np.sum(nlls_xod_given_xoltd.reshape(-1, len(dataset)), axis=0) p_x = np.exp(np.logaddexp.reduce(-nlls)) print("Sum of p(x) for all x:", p_x) assert_almost_equal(p_x, 1., decimal=5)
def test_new_fprop_matches_old_fprop(): nb_kernels = 8 kernel_shape = (2, 2) hidden_activation = "sigmoid" use_mask_as_input = True batch_size = 1024 ordering_seed = 1234 max_epoch = 10 nb_orderings = 1 print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(max_epoch)) with Timer("Loading/processing binarized MNIST"): trainset, validset, testset = load_binarized_mnist() # Extract the center patch (4x4 pixels) of each image. indices_to_keep = [ 348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432, 433, 434, 435 ] trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep], trainset.inputs.get_value()[:, indices_to_keep], name="trainset") validset = Dataset(validset.inputs.get_value()[:, indices_to_keep], validset.inputs.get_value()[:, indices_to_keep], name="validset") testset = Dataset(testset.inputs.get_value()[:, indices_to_keep], testset.inputs.get_value()[:, indices_to_keep], name="testset") image_shape = (4, 4) nb_channels = 1 + (use_mask_as_input is True) with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, use_mask_as_input=use_mask_as_input) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize() # By default, uniform initialization. with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size, use_mask_as_input=use_mask_as_input) trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) trainer.build_theano_graph() with Timer("Training"): trainer.train() mask_o_lt_d = batch_scheduler._shared_batch_mask fprop_output, fprop_pre_output = model.fprop( trainset.inputs, mask_o_lt_d, return_output_preactivation=True) model_output = model.get_output( T.concatenate([trainset.inputs * mask_o_lt_d, mask_o_lt_d], axis=1)) assert_array_equal(model_output.eval(), fprop_pre_output.eval()) print(np.sum(abs(model_output.eval() - fprop_pre_output.eval())))
def test_simple_convnade(): nb_kernels = 8 kernel_shape = (2, 2) hidden_activation = "sigmoid" use_mask_as_input = True batch_size = 1024 ordering_seed = 1234 max_epoch = 3 nb_orderings = 1 print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(max_epoch)) with Timer("Loading/processing binarized MNIST"): trainset, validset, testset = load_binarized_mnist() # Extract the center patch (4x4 pixels) of each image. indices_to_keep = [ 348, 349, 350, 351, 376, 377, 378, 379, 404, 405, 406, 407, 432, 433, 434, 435 ] trainset = Dataset(trainset.inputs.get_value()[:, indices_to_keep], trainset.inputs.get_value()[:, indices_to_keep], name="trainset") validset = Dataset(validset.inputs.get_value()[:, indices_to_keep], validset.inputs.get_value()[:, indices_to_keep], name="validset") testset = Dataset(testset.inputs.get_value()[:, indices_to_keep], testset.inputs.get_value()[:, indices_to_keep], name="testset") image_shape = (4, 4) nb_channels = 1 with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, use_mask_as_input=use_mask_as_input) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize() # By default, uniform initialization. with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size) trainer = Trainer(optimizer, batch_scheduler) trainer.append_task(stopping_criteria.MaxEpochStopping(max_epoch)) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. nll = views.LossView( loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask( validset, batch_size=len(validset))) trainer.append_task( tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) trainer.build_theano_graph() with Timer("Training"): trainer.train() with Timer("Checking the probs for all possible inputs sum to 1"): rng = np.random.RandomState(ordering_seed) D = np.prod(image_shape) inputs = cartesian([[0, 1]] * int(D), dtype=np.float32) ordering = np.arange(D, dtype=np.int32) rng.shuffle(ordering) symb_input = T.vector("input") symb_input.tag.test_value = inputs[-len(inputs) // 4] symb_ordering = T.ivector("ordering") symb_ordering.tag.test_value = ordering nll_of_x_given_o = theano.function([symb_input, symb_ordering], model.nll_of_x_given_o( symb_input, symb_ordering), name="nll_of_x_given_o") #theano.printing.pydotprint(nll_of_x_given_o, '{0}_nll_of_x_given_o_{1}'.format(model.__class__.__name__, theano.config.device), with_ids=True) for i in range(nb_orderings): print("Ordering:", ordering) ordering = np.arange(D, dtype=np.int32) rng.shuffle(ordering) nlls = [] for no, input in enumerate(inputs): print("{}/{}".format(no, len(inputs)), end='\r') nlls.append(nll_of_x_given_o(input, ordering)) print("{}/{} Done".format(len(inputs), len(inputs))) p_x = np.exp(np.logaddexp.reduce(-np.array(nlls))) print("Sum of p(x) for all x:", p_x) assert_almost_equal(p_x, 1., decimal=5)