def cost(self, x, y, n_patches): energy = self.mlp.apply(x) cross_entropy = self.softmax.categorical_cross_entropy( y.flatten(), energy).mean(axis=0) error_rate = T.neq(y, energy.argmax(axis=1)).mean(axis=0) cost = util.named(cross_entropy, "cost") self.add_auxiliary_variable(cross_entropy, name="cross_entropy") self.add_auxiliary_variable(error_rate, name="error_rate") return cost
def cost(self, cs, y, n_patches): energies = [self.mlp.apply(cs[:, t, :]) for t in xrange(n_patches)] cross_entropies = [self.softmax.categorical_cross_entropy(y.flatten(), energy) for energy in energies] error_rates = [T.neq(y, energy.argmax(axis=1)).mean(axis=0) for energy in energies] # train on final prediction cost = util.named(cross_entropies[-1], "cost") # monitor final prediction self.add_auxiliary_variable(cross_entropies[-1], name="cross_entropy") self.add_auxiliary_variable(error_rates[-1], name="error_rate") return cost
def cost(self, cs, y, n_patches): energies = [self.mlp.apply(cs[:, t, :]) for t in xrange(n_patches)] cross_entropies = [ self.softmax.categorical_cross_entropy(y.flatten(), energy) for energy in energies ] error_rates = [ T.neq(y, energy.argmax(axis=1)).mean(axis=0) for energy in energies ] # train on final prediction cost = util.named(cross_entropies[-1], "cost") # monitor final prediction self.add_auxiliary_variable(cross_entropies[-1], name="cross_entropy") self.add_auxiliary_variable(error_rates[-1], name="error_rate") return cost
def construct_monitors(algorithm, task, n_patches, x, x_shape, graph, name, ram, model, cost, n_spatial_dims, plot_url, patchmonitor_interval=100, **kwargs): location, scale, savings = util.get_recurrent_auxiliaries( "location scale savings".split(), graph, n_patches) channels = util.Channels() channels.extend(task.monitor_channels(graph)) channels.append(util.named(savings.mean(), "savings.mean")) for variable_name in "location scale".split(): variable = locals()[variable_name] channels.append(variable.mean(axis=0), "%s.mean" % variable_name) channels.append(variable.var(axis=0), "%s.variance" % variable_name) channels.append(algorithm.total_gradient_norm, "total_gradient_norm") step_norms = util.Channels() step_norms.extend(util.named(l2_norm([algorithm.steps[param]]), "%s.step_norm" % name) for name, param in model.get_parameter_dict().items()) step_channels = step_norms.get_channels() #for activation in VariableFilter(roles=[OUTPUT])(graph.variables): # quantity = activation.mean() # quantity.name = "%s.mean" % util.get_path(activation) # channels.append(quantity) data_independent_channels = util.Channels() for parameter in graph.parameters: if parameter.name in "gamma beta".split(): quantity = parameter.mean() quantity.name = "%s.mean" % util.get_path(parameter) data_independent_channels.append(quantity) extensions = [] extensions.append(TrainingDataMonitoring( step_channels, prefix="train", after_epoch=True)) extensions.append(DataStreamMonitoring(data_independent_channels.get_channels(), data_stream=None, after_epoch=True)) extensions.extend(DataStreamMonitoring((channels.get_channels() + [cost]), data_stream=task.get_stream(which, monitor=True), prefix=which, after_epoch=True) for which in "train valid test".split()) patchmonitor = None if n_spatial_dims == 2: patchmonitor_klass = PatchMonitoring elif n_spatial_dims == 3: patchmonitor_klass = VideoPatchMonitoring if patchmonitor_klass: patch = T.stack(*[ ram.crop(x, x_shape, location[:, i, :], scale[:, i, :]) for i in xrange(n_patches)]) patch = patch.dimshuffle(1, 0, *range(2, patch.ndim)) patch_extractor = theano.function([x, x_shape], [location, scale, patch]) for which in "train valid".split(): patchmonitor = patchmonitor_klass( save_to="%s_patches_%s" % (name, which), data_stream=task.get_stream(which, shuffle=False, num_examples=5), every_n_batches=patchmonitor_interval, extractor=patch_extractor, map_to_input_space=attention.static_map_to_input_space) patchmonitor.save_patches("patchmonitor_test.png") extensions.append(patchmonitor) if plot_url: plot_channels = [] plot_channels.extend(task.plot_channels()) plot_channels.append(["train_cost"]) #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels]) from blocks.extras.extensions.plot import Plot extensions.append(Plot(name, channels=plot_channels, after_epoch=True, server_url=plot_url)) return extensions
def construct_monitors( algorithm, task, n_patches, x, x_uncentered, hs, graph, plot_url, name, ram, model, cost, n_spatial_dims, patchmonitor_interval=100, **kwargs ): location, scale, savings = util.get_recurrent_auxiliaries("location scale savings".split(), graph, n_patches) channels = util.Channels() channels.extend(task.monitor_channels(graph)) for i in xrange(n_patches): channels.append(hs[:, i].mean(), "h%i.mean" % i) channels.append(util.named(savings.mean(), "savings.mean")) for variable_name in "location scale".split(): variable = locals()[variable_name] channels.append(variable.var(axis=0).mean(), "%s.batch_variance" % variable_name) channels.append(variable.var(axis=1).mean(), "%s.time_variance" % variable_name) # step_norms = util.Channels() # step_norms.extend(util.named(l2_norm([algorithm.steps[param]]), # "%s.step_norm" % name) # for name, param in model.get_parameter_dict().items()) # step_channels = step_norms.get_channels() for activation in VariableFilter(roles=[OUTPUT])(graph.variables): quantity = activation.mean() quantity.name = "%s.mean" % util.get_path(activation) channels.append(quantity) extensions = [] # extensions.append(TrainingDataMonitoring( # step_channels, # prefix="train", after_epoch=True)) extensions.extend( DataStreamMonitoring( (channels.get_channels() + [cost]), data_stream=task.get_stream(which), prefix=which, after_epoch=True ) for which in "train valid test".split() ) patchmonitor = None if n_spatial_dims == 2: patchmonitor_klass = PatchMonitoring elif n_spatial_dims == 3: patchmonitor_klass = VideoPatchMonitoring if patchmonitor_klass: # get patches from original (uncentered) images patch = T.stack( *[ram.attention.crop(x_uncentered, location[:, i, :], scale[:, i, :]) for i in xrange(n_patches)] ) patch = patch.dimshuffle(1, 0, *range(2, patch.ndim)) patchmonitor = patchmonitor_klass( task.get_stream("valid", SequentialScheme(5, 5)), every_n_batches=patchmonitor_interval, extractor=theano.function([x_uncentered], [location, scale, patch]), map_to_input_space=masonry.static_map_to_input_space, ) patchmonitor.save_patches("test.png") extensions.append(patchmonitor) plot_channels = [] plot_channels.extend(task.plot_channels()) plot_channels.append(["train_cost"]) # plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels]) extensions.append(Plot(name, channels=plot_channels, after_epoch=True, server_url=plot_url)) return extensions
def construct_monitors(algorithm, task, n_patches, x, x_uncentered, hs, graph, plot_url, name, ram, model, cost, n_spatial_dims, patchmonitor_interval=100, **kwargs): location, scale, savings = util.get_recurrent_auxiliaries( "location scale savings".split(), graph, n_patches) channels = util.Channels() channels.extend(task.monitor_channels(graph)) #for i in xrange(n_patches): # channels.append(hs[:, i].mean(), "h%i.mean" % i) channels.append(util.named(savings.mean(), "savings.mean")) for variable_name in "location scale".split(): variable = locals()[variable_name] channels.append( variable.var(axis=0).mean(), "%s.batch_variance" % variable_name) channels.append( variable.var(axis=1).mean(), "%s.time_variance" % variable_name) #step_norms = util.Channels() #step_norms.extend(util.named(l2_norm([algorithm.steps[param]]), # "%s.step_norm" % name) # for name, param in model.get_parameter_dict().items()) #step_channels = step_norms.get_channels() #for activation in VariableFilter(roles=[OUTPUT])(graph.variables): # quantity = activation.mean() # quantity.name = "%s.mean" % util.get_path(activation) # channels.append(quantity) for parameter in graph.parameters: if parameter.name in "gamma beta".split(): quantity = parameter.mean() quantity.name = "%s.mean" % util.get_path(parameter) channels.append(quantity) extensions = [] #extensions.append(TrainingDataMonitoring( # step_channels, # prefix="train", after_epoch=True)) extensions.extend( DataStreamMonitoring((channels.get_channels() + [cost]), data_stream=task.get_stream(which), prefix=which, after_epoch=True) for which in "train valid test".split()) patchmonitor = None if n_spatial_dims == 2: patchmonitor_klass = PatchMonitoring elif n_spatial_dims == 3: patchmonitor_klass = VideoPatchMonitoring if patchmonitor_klass: # get patches from original (uncentered) images patch = T.stack(*[ ram.attention.crop(x_uncentered, location[:, i, :], scale[:, i, :]) for i in xrange(n_patches) ]) patch = patch.dimshuffle(1, 0, *range(2, patch.ndim)) patchmonitor = patchmonitor_klass( task.get_stream("valid", SequentialScheme(5, 5)), every_n_batches=patchmonitor_interval, extractor=theano.function([x_uncentered], [location, scale, patch]), map_to_input_space=masonry.static_map_to_input_space) patchmonitor.save_patches("test.png") extensions.append(patchmonitor) plot_channels = [] plot_channels.extend(task.plot_channels()) plot_channels.append(["train_cost"]) #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels]) extensions.append( Plot(name, channels=plot_channels, after_epoch=True, server_url=plot_url)) return extensions
def construct_monitors(algorithm, task, n_patches, x, x_shape, graph, name, ram, model, cost, n_spatial_dims, plot_url, patchmonitor_interval=100, **kwargs): location, scale, savings = util.get_recurrent_auxiliaries( "location scale savings".split(), graph, n_patches) channels = util.Channels() channels.extend(task.monitor_channels(graph)) channels.append(util.named(savings.mean(), "savings.mean")) for variable_name in "location scale".split(): variable = locals()[variable_name] channels.append(variable.mean(axis=0), "%s.mean" % variable_name) channels.append(variable.var(axis=0), "%s.variance" % variable_name) channels.append(algorithm.total_gradient_norm, "total_gradient_norm") step_norms = util.Channels() step_norms.extend( util.named(l2_norm([algorithm.steps[param]]), "%s.step_norm" % name) for name, param in model.get_parameter_dict().items()) step_channels = step_norms.get_channels() #for activation in VariableFilter(roles=[OUTPUT])(graph.variables): # quantity = activation.mean() # quantity.name = "%s.mean" % util.get_path(activation) # channels.append(quantity) data_independent_channels = util.Channels() for parameter in graph.parameters: if parameter.name in "gamma beta".split(): quantity = parameter.mean() quantity.name = "%s.mean" % util.get_path(parameter) data_independent_channels.append(quantity) extensions = [] extensions.append( TrainingDataMonitoring(step_channels, prefix="train", after_epoch=True)) extensions.append( DataStreamMonitoring(data_independent_channels.get_channels(), data_stream=None, after_epoch=True)) extensions.extend( DataStreamMonitoring((channels.get_channels() + [cost]), data_stream=task.get_stream(which, monitor=True), prefix=which, after_epoch=True) for which in "train valid test".split()) patchmonitor = None if n_spatial_dims == 2: patchmonitor_klass = PatchMonitoring elif n_spatial_dims == 3: patchmonitor_klass = VideoPatchMonitoring if patchmonitor_klass: patch = T.stack(*[ ram.crop(x, x_shape, location[:, i, :], scale[:, i, :]) for i in xrange(n_patches) ]) patch = patch.dimshuffle(1, 0, *range(2, patch.ndim)) patch_extractor = theano.function([x, x_shape], [location, scale, patch]) for which in "train valid".split(): patchmonitor = patchmonitor_klass( save_to="%s_patches_%s" % (name, which), data_stream=task.get_stream(which, shuffle=False, num_examples=5), every_n_batches=patchmonitor_interval, extractor=patch_extractor, map_to_input_space=attention.static_map_to_input_space) patchmonitor.save_patches("patchmonitor_test.png") extensions.append(patchmonitor) if plot_url: plot_channels = [] plot_channels.extend(task.plot_channels()) plot_channels.append(["train_cost"]) #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels]) from blocks.extras.extensions.plot import Plot extensions.append( Plot(name, channels=plot_channels, after_epoch=True, server_url=plot_url)) return extensions