def test_gradient_descent_finds_inputs_additional_updates(): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) n = shared_floatx(1) m = tensor.scalar('m') algorithm = GradientDescent(gradients=OrderedDict([(W, W + 1)])) algorithm.add_updates([(n, n + m)]) algorithm.initialize() assert m in algorithm.inputs
def test_gradient_descent(): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) W_start_value = W.get_value() cost = tensor.sum(W**2) algorithm = GradientDescent(cost=cost, parameters=[W]) algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() algorithm.process_batch(dict()) assert_allclose(W.get_value(), -0.5 * W_start_value)
def test_gradient_descent(): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) W_start_value = W.get_value() cost = tensor.sum(W ** 2) algorithm = GradientDescent(cost=cost, parameters=[W]) algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() algorithm.process_batch(dict()) assert_allclose(W.get_value(), -0.5 * W_start_value)
def _test(f): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) W_start_value = W.get_value() cost = tensor.sum(W**2) gradients = OrderedDict() gradients[W] = tensor.grad(cost, W) algorithm = GradientDescent(gradients=f(gradients)) algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() algorithm.process_batch(dict()) assert_allclose(W.get_value(), -0.5 * W_start_value)
def _test(f): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) W_start_value = W.get_value() cost = tensor.sum(W ** 2) gradients = OrderedDict() gradients[W] = tensor.grad(cost, W) algorithm = GradientDescent(gradients=f(gradients)) algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() algorithm.process_batch(dict()) assert_allclose(W.get_value(), -0.5 * W_start_value)
def test_theano_profile_for_sgd_function(): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) W_start_value = W.get_value() cost = tensor.sum(W ** 2) algorithm = GradientDescent( cost=cost, parameters=[W], theano_func_kwargs={'profile': True}) algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() algorithm.process_batch(dict()) assert_allclose(W.get_value(), -0.5 * W_start_value) assert isinstance(algorithm._function.profile, ProfileStats)
def test_gradient_descent_spurious_sources(): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) W_start_value = W.get_value() cost = tensor.sum(W ** 2) algorithm = GradientDescent(cost=cost, parameters=[W]) algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() assert_raises(lambda: algorithm.process_batch(dict(example_id='test'))) algorithm = GradientDescent(cost=cost, parameters=[W], on_unused_sources='ignore') algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() algorithm.process_batch(dict(example_id='test')) assert_allclose(W.get_value(), -0.5 * W_start_value)
def test_gradient_descent_spurious_sources(): W = shared_floatx(numpy.array([[1, 2], [3, 4]])) W_start_value = W.get_value() cost = tensor.sum(W**2) algorithm = GradientDescent(cost=cost, parameters=[W]) algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() assert_raises(lambda: algorithm.process_batch(dict(example_id='test'))) algorithm = GradientDescent(cost=cost, parameters=[W], on_unused_sources='ignore') algorithm.step_rule.learning_rate.set_value(0.75) algorithm.initialize() algorithm.process_batch(dict(example_id='test')) assert_allclose(W.get_value(), -0.5 * W_start_value)
def construct_main_loop(name, task_name, batch_size, max_epochs, patience_epochs, learning_rate, hyperparameters, **kwargs): task = tasks.get_task(**hyperparameters) hyperparameters["n_channels"] = task.n_channels extensions = [] print "constructing graphs..." graphs, outputs, updates = construct_graphs(task=task, **hyperparameters) print "setting up main loop..." from blocks.model import Model model = Model(outputs["train"]["cost"]) from blocks.algorithms import GradientDescent, CompositeRule, StepClipping, Adam algorithm = GradientDescent(cost=outputs["train"]["cost"], parameters=graphs["train"].parameters, step_rule=CompositeRule([ StepClipping(1e1), Adam(learning_rate=learning_rate), StepClipping(1e2) ]), on_unused_sources="warn") algorithm.add_updates(updates["train"]) extensions.extend( construct_monitors(algorithm=algorithm, task=task, model=model, graphs=graphs, outputs=outputs, **hyperparameters)) from blocks.extensions import FinishAfter, Printing, ProgressBar, Timing from blocks.extensions.stopping import FinishIfNoImprovementAfter from blocks.extensions.training import TrackTheBest from blocks.extensions.saveload import Checkpoint from dump import DumpBest, LightCheckpoint, PrintingTo extensions.extend([ TrackTheBest("valid_error_rate", "best_valid_error_rate"), FinishIfNoImprovementAfter("best_valid_error_rate", epochs=patience_epochs), FinishAfter(after_n_epochs=max_epochs), DumpBest("best_valid_error_rate", name + "_best.zip"), Checkpoint(hyperparameters["checkpoint_save_path"], on_interrupt=False, every_n_epochs=5, before_training=True, use_cpickle=True), ProgressBar(), Timing(), Printing(), PrintingTo(name + "_log") ]) from blocks.main_loop import MainLoop main_loop = MainLoop(data_stream=task.get_stream("train"), algorithm=algorithm, extensions=extensions, model=model) # note blocks will crash and burn because it cannot deal with an # already-initialized Algorithm, so this should be enabled only for # debugging if False: with open("graph", "w") as graphfile: algorithm.initialize() theano.printing.debugprint(algorithm._function, file=graphfile) from tabulate import tabulate print "parameter sizes:" print tabulate( (key, "x".join(map(str, value.get_value().shape)), value.get_value().size) for key, value in main_loop.model.get_parameter_dict().items()) return main_loop
def construct_main_loop(name, task_name, batch_size, max_epochs, patience_epochs, learning_rate, hyperparameters, **kwargs): task = tasks.get_task(**hyperparameters) hyperparameters["n_channels"] = task.n_channels extensions = [] print "constructing graphs..." graphs, outputs, updates = construct_graphs(task=task, **hyperparameters) print "setting up main loop..." from blocks.model import Model model = Model(outputs["train"]["cost"]) from blocks.algorithms import GradientDescent, CompositeRule, StepClipping, Adam algorithm = GradientDescent( cost=outputs["train"]["cost"], parameters=graphs["train"].parameters, step_rule=CompositeRule([Adam(learning_rate=learning_rate), StepClipping(1e3)]), on_unused_sources="warn") algorithm.add_updates(updates["train"]) extensions.extend(construct_monitors( algorithm=algorithm, task=task, model=model, graphs=graphs, outputs=outputs, updates=updates, **hyperparameters)) from blocks.extensions import FinishAfter, Printing, ProgressBar, Timing from blocks.extensions.stopping import FinishIfNoImprovementAfter from blocks.extensions.training import TrackTheBest from blocks.extensions.saveload import Checkpoint from dump import DumpBest, LightCheckpoint, PrintingTo extensions.extend([ TrackTheBest("valid_error_rate", "best_valid_error_rate"), FinishIfNoImprovementAfter("best_valid_error_rate", epochs=patience_epochs), FinishAfter(after_n_epochs=max_epochs), DumpBest("best_valid_error_rate", name+"_best.zip"), Checkpoint(hyperparameters["checkpoint_save_path"], on_interrupt=False, every_n_epochs=5, before_training=True, use_cpickle=True), ProgressBar(), Timing(), Printing(), PrintingTo(name+"_log")]) from blocks.main_loop import MainLoop main_loop = MainLoop(data_stream=task.get_stream("train"), algorithm=algorithm, extensions=extensions, model=model) # note blocks will crash and burn because it cannot deal with an # already-initialized Algorithm, so this should be enabled only for # debugging if False: with open("graph", "w") as graphfile: algorithm.initialize() theano.printing.debugprint(algorithm._function, file=graphfile) from tabulate import tabulate print "parameter sizes:" print tabulate((key, "x".join(map(str, value.get_value().shape)), value.get_value().size) for key, value in main_loop.model.get_parameter_dict().items()) return main_loop