def _build_trainer(nb_epochs): print("Will train Convoluational Deep NADE for a total of {0} epochs.". format(nb_epochs)) with Timer("Building model"): builder = DeepConvNADEBuilder(image_shape=image_shape, nb_channels=nb_channels, use_mask_as_input=use_mask_as_input) convnet_blueprint = "64@2x2(valid) -> 1@2x2(full)" fullnet_blueprint = "5 -> 16" print("Convnet:", convnet_blueprint) print("Fullnet:", fullnet_blueprint) builder.build_convnet_from_blueprint(convnet_blueprint) builder.build_fullnet_from_blueprint(fullnet_blueprint) model = builder.build() model.initialize(initer.UniformInitializer(random_seed=1234)) with Timer("Building optimizer"): loss = BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, trainset) optimizer = SGD(loss=loss) optimizer.append_direction_modifier(ConstantLearningRate(0.001)) with Timer("Building trainer"): batch_scheduler = MiniBatchSchedulerWithAutoregressiveMask( trainset, batch_size) trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) accum = tasks.Accumulator(loss_monitor) logger = tasks.Logger(loss_monitor, avg_loss) trainer.append_task(logger, avg_loss, accum) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # Print NLL mean/stderror. nll = views.LossView( loss=BinaryCrossEntropyEstimateWithAutoRegressiveMask( model, validset), batch_scheduler=MiniBatchSchedulerWithAutoregressiveMask( validset, batch_size=len(validset), keep_mask=True)) trainer.append_task( tasks.Print("Validset - NLL : {0:.2f} ± {1:.2f}", nll.mean, nll.stderror)) trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs)) return trainer, nll, logger
def test_fprop_faster(self): activation = "tanh" seed = 1234 repeat = 1000 layer = LayerLSTM(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) layer.initialize(initer.UniformInitializer(seed)) layer_fast = LayerLSTMFast(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) # Wi, Wo, Wf, Wm layer_fast.W.set_value( np.concatenate([ layer.Wi.get_value(), layer.Wo.get_value(), layer.Wf.get_value(), layer.Wm.get_value() ], axis=1)) layer_fast.U.set_value( np.concatenate([ layer.Ui.get_value(), layer.Uo.get_value(), layer.Uf.get_value(), layer.Um.get_value() ], axis=1)) input = T.matrix('input') input.tag.test_value = DATA['batch_one_step'] last_h = sharedX(DATA['state_h']) last_m = sharedX(DATA['state_m']) fprop = theano.function([input], layer.fprop(input, last_h, last_m)) fprop_faster = theano.function([input], layer_fast.fprop(input, last_h, last_m)) fprop_time = measure("h, m = fprop(DATA['batch_one_step'])", repeat) fprop_faster_time = measure( "h, m = fprop_faster(DATA['batch_one_step'])", repeat) print("fprop time: {:.2f} sec.", fprop_time) print("fprop faster time: {:.2f} sec.", fprop_faster_time) print("Speedup: {:.2f}x".format(fprop_time / fprop_faster_time)) for i in range(DATA['seq_len']): h1, m1 = fprop(DATA['batch'][:, i, :]) h2, m2 = fprop_faster(DATA['batch'][:, i, :]) assert_array_equal(h1, h2) assert_array_equal(m1, m2)
def weigths_initializer_factory(name, seed=1234): if name == "uniform": return initer.UniformInitializer(seed) elif name == "zeros": return initer.ZerosInitializer(seed) elif name == "diagonal": return initer.DiagonalInitializer(seed) elif name == "orthogonal": return OrthogonalInitializer(seed) elif name == "gaussian": return initer.GaussienInitializer(seed) raise NotImplementedError("Unknown: " + str(name))
def test_fprop_mask_vs_not_mask(self): activation = "tanh" seed = 1234 repeat = 100 lstm = LSTM( input_size=DATA['features_size'], hidden_sizes=[DATA['hidden_size']], ) lstm.initialize(initer.UniformInitializer(seed)) lstm2 = LSTMFast( input_size=DATA['features_size'], hidden_sizes=[DATA['hidden_size']], ) lstm2.mask = sharedX(DATA['mask']) # Wi, Wo, Wf, Wm # Make sure the weights are the same. lstm2.layers_lstm[0].W.set_value( np.concatenate([ lstm.layers_lstm[0].Wi.get_value(), lstm.layers_lstm[0].Wo.get_value(), lstm.layers_lstm[0].Wf.get_value(), lstm.layers_lstm[0].Wm.get_value() ], axis=1)) lstm2.layers_lstm[0].U.set_value( np.concatenate([ lstm.layers_lstm[0].Ui.get_value(), lstm.layers_lstm[0].Uo.get_value(), lstm.layers_lstm[0].Uf.get_value(), lstm.layers_lstm[0].Um.get_value() ], axis=1)) input = T.tensor3('input') input.tag.test_value = DATA['batch'] fprop = theano.function([input], lstm.get_output(input)) fprop2 = theano.function([input], lstm2.get_output(input)) # fprop_time = measure("out = fprop(DATA['batch'])", repeat) # print("fprop time: {:.2f} sec.", fprop_time) out = fprop(DATA['batch']) out2 = fprop2(DATA['batch']) assert_true(out.sum != out2.sum()) assert_array_equal((out * DATA['mask'][:, :, None]), (out2 * DATA['mask'][:, :, None]))
def test_fprop_faster(self): seed = 1234 repeat = 100 lstm = LSTM( input_size=DATA['features_size'], hidden_sizes=[DATA['hidden_size']], ) lstm.initialize(initer.UniformInitializer(seed)) lstm2 = LSTMFaster( input_size=DATA['features_size'], hidden_sizes=[DATA['hidden_size']], ) # Wi, Wo, Wf, Wm # Make sure the weights are the same. lstm2.layers_lstm[0].W.set_value( np.concatenate([ lstm.layers_lstm[0].Wi.get_value(), lstm.layers_lstm[0].Wo.get_value(), lstm.layers_lstm[0].Wf.get_value(), lstm.layers_lstm[0].Wm.get_value() ], axis=1)) lstm2.layers_lstm[0].U.set_value( np.concatenate([ lstm.layers_lstm[0].Ui.get_value(), lstm.layers_lstm[0].Uo.get_value(), lstm.layers_lstm[0].Uf.get_value(), lstm.layers_lstm[0].Um.get_value() ], axis=1)) input = T.tensor3('input') input.tag.test_value = DATA['batch'] fprop = theano.function([input], lstm.get_output(input)) fprop2 = theano.function([input], lstm2.get_output(input)) fprop_time = measure("out = fprop(DATA['batch'])", repeat) print("fprop time: {:.2f} sec.", fprop_time) fprop2_time = measure("out = fprop2(DATA['batch'])", repeat) print("fprop faster time: {:.2f} sec.", fprop2_time) print("Speedup: {:.2f}x".format(fprop_time / fprop2_time)) out = fprop(DATA['batch']) out2 = fprop2(DATA['batch']) assert_array_equal(out, out2)
def _build_trainer(nb_epochs, optimizer_cls): print( "Will build a trainer is going to train a Perceptron for {0} epochs." .format(nb_epochs)) print("Building model") model = Perceptron(trainset.input_size, nb_classes) model.initialize(initer.UniformInitializer(random_seed=1234)) print("Building optimizer") loss = NLL(model, trainset) optimizer = optimizer_cls(loss=loss) print("Optimizer: {}".format(type(optimizer).__name__)) #optimizer = SGD(loss=loss) #optimizer.append_direction_modifier(ConstantLearningRate(0.1)) # Use mini batches of 100 examples. batch_scheduler = MiniBatchScheduler(trainset, 100) print("Building trainer") trainer = Trainer(optimizer, batch_scheduler) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) # Print NLL mean/stderror. nll = views.LossView(loss=NLL(model, validset), batch_scheduler=FullBatchScheduler(validset)) logger = tasks.Logger(loss_monitor, avg_loss, nll.mean) trainer.append_task(logger, avg_loss) # Train for `nb_epochs` epochs (stopping criteria should be added at the end). trainer.append_task(stopping_criteria.MaxEpochStopping(nb_epochs)) return trainer, nll, logger
def test_fprop(self): activation = "tanh" seed = 1234 repeat = 1000 layer = LayerLSTM(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) layer.initialize(initer.UniformInitializer(seed)) # input = T.tensor3('input') input = T.matrix('input') input.tag.test_value = DATA['batch_one_step'] last_h = sharedX(DATA['state_h']) last_m = sharedX(DATA['state_m']) fprop = theano.function([input], layer.fprop_faster(input, last_h, last_m)) fprop_time = measure("h, m = fprop(DATA['batch_one_step'])", repeat) print("fprop time: {:.2f} sec.", fprop_time) h, m = fprop(DATA['batch_one_step'])
def initialize(self, weights_initializer=initer.UniformInitializer(1234)): for layer in self.layers: layer.initialize(weights_initializer)
def initialize( self, weights_initializer=initer.UniformInitializer(random_seed=1234)): weights_initializer(self.W)
def initialize(self, weights_initializer=initer.UniformInitializer(1234)): super().initialize(weights_initializer) self.layer_regression.initialize(weights_initializer)
def initialize(self, weights_initializer=initer.UniformInitializer(1234)): super().initialize(weights_initializer) self.layer_regression.initialize(weights_initializer) if self.learn_to_stop: self.layer_stopping.initialize(weights_initializer)
def initialize(self, weights_initializer=initer.UniformInitializer(1234)): super().initialize(weights_initializer) self.layer_classification.initialize(weights_initializer)
def initialize(self, weights_initializer=initer.UniformInitializer(1234)): weights_initializer(self.W)