def get_updates(variables): # this is fugly because we must get the batch stats from the # graph so we get the ones that are *actually being used in # the computation* after graph transforms have been applied updates = [] variables = graph.deep_ancestors(variables) for stat, role in BatchNormalization.roles.items(): from blocks.roles import has_roles batch_stats = [var for var in variables if has_roles(var, [role])] batch_stats = util.dedup(batch_stats, equal=util.equal_computations) batch_stats_by_brick = OrderedDict() for batch_stat in batch_stats: brick = batch_stat.tag.batch_normalization_brick population_stat = brick.population_stats[stat] batch_stats_by_brick.setdefault(brick, []).append(batch_stat) for brick, batch_stats in batch_stats_by_brick.items(): population_stat = brick.population_stats[stat] if len(batch_stats) > 1: # makes sense for recurrent structures logger.warning("averaging multiple population statistic estimates to update %s: %s" % (util.get_path(population_stat), batch_stats)) batch_stat = T.stack(batch_stats).mean(axis=0) updates.append((population_stat, (1 - brick.alpha) * population_stat + brick.alpha * batch_stat)) return updates
def lq (self, thresholds, logger, title): stc, sco, stw, sws = self.sum() if stc == 0: return (float('NaN'), 0, 0, []) # base rates brc = float(stc)/sco brw = float(stw)/sws if thresholds is None: thresholds = [brc] if brc == brw else [brc,brw] # convert thresholds to whole numbers of bins cumco = util.cumsum([co for _tc, co, _tw, _ws in self.bins]) top = len(self.bins) - 2 # max bin index for thresholds thresholds = [min(top,bisect.bisect_left(cumco,th*sco)) for th in thresholds] thresholds = util.dedup(thresholds) # http://en.wikipedia.org/wiki/Trapezoidal_rule cph = sum([(stw - tw/2)*ws for stw,(tw,ws) in zip(util.cumsum([tw for _tc, _co, tw, _ws in self.bins]), [(tw,ws) for _tc, _co, tw, ws in self.bins])]) cph = float(cph) / (sws*stw) logger.info("%s: cph=%g targetLevel=%g totalWeight=%g", title,cph,stw,sws) lq = (2*cph - 1) / (1 - brw) mxl = [] for threshold in thresholds: mx = ConfusionMX("{h:s} at {d:.2%}".format(h=title,d=float(cumco[threshold])/sco)) mxl.append(mx) for i in range(len(self.bins)): _tc, _co, tw, ws = self.bins[i] if tw > 0: mx.add(True, i <= threshold, tw) if ws > tw: mx.add(False, i <= threshold, ws - tw) return (lq, brc, brw, mxl)
def tag_convnet_dropout(outputs, rng=None, **kwargs): from blocks.roles import has_roles, OUTPUT cnn_outputs = OrderedDict() for var in theano.gof.graph.ancestors(outputs): if (has_roles(var, [OUTPUT]) and util.annotated_by_a(util.get_convolution_classes(), var)): cnn_outputs.setdefault(util.get_path(var), []).append(var) unique_outputs = [] for path, vars in cnn_outputs.items(): vars = util.dedup(vars, equal=util.equal_computations) unique_outputs.extend(vars) graph.add_transform(unique_outputs, graph.DropoutTransform("convnet_dropout", rng=rng), reason="regularization")
def tag_convnet_dropout(outputs, rng=None, **kwargs): from blocks.roles import has_roles, OUTPUT cnn_outputs = OrderedDict() for var in theano.gof.graph.ancestors(outputs): if (has_roles(var, [OUTPUT]) and util.annotated_by_a( util.get_convolution_classes(), var)): cnn_outputs.setdefault(util.get_path(var), []).append(var) unique_outputs = [] for path, vars in cnn_outputs.items(): vars = util.dedup(vars, equal=util.equal_computations) unique_outputs.append(util.the(vars)) graph.add_transform( unique_outputs, graph.DropoutTransform("convnet_dropout", rng=rng), reason="regularization")
def lq(self, thresholds, logger, title): stc, sco, stw, sws = self.sum() if stc == 0: return (float('NaN'), 0, 0, []) # base rates brc = float(stc) / sco brw = float(stw) / sws if thresholds is None: thresholds = [brc] if brc == brw else [brc, brw] # convert thresholds to whole numbers of bins cumco = util.cumsum([co for _tc, co, _tw, _ws in self.bins]) top = len(self.bins) - 2 # max bin index for thresholds thresholds = [ min(top, bisect.bisect_left(cumco, th * sco)) for th in thresholds ] thresholds = util.dedup(thresholds) # http://en.wikipedia.org/wiki/Trapezoidal_rule cph = sum([(stw - tw / 2) * ws for stw, ( tw, ws) in zip(util.cumsum([tw for _tc, _co, tw, _ws in self.bins]), [( tw, ws) for _tc, _co, tw, ws in self.bins])]) cph = float(cph) / (sws * stw) logger.info("%s: cph=%g targetLevel=%g totalWeight=%g", title, cph, stw, sws) lq = (2 * cph - 1) / (1 - brw) mxl = [] for threshold in thresholds: mx = ConfusionMX("{h:s} at {d:.2%}".format( h=title, d=float(cumco[threshold]) / sco)) mxl.append(mx) for i in range(len(self.bins)): _tc, _co, tw, ws = self.bins[i] if tw > 0: mx.add(True, i <= threshold, tw) if ws > tw: mx.add(False, i <= threshold, ws - tw) return (lq, brc, brw, mxl)
def construct_monitors(algorithm, task, model, graphs, outputs, updates, monitor_options, n_spatial_dims, plot_url, hyperparameters, patchmonitor_interval, **kwargs): from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring extensions = [] if "steps" in monitor_options: step_channels = [] step_channels.extend([ algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name) for name, param in model.get_parameter_dict().items() ]) step_channels.append( algorithm.total_step_norm.copy(name="total_step_norm")) step_channels.append( algorithm.total_gradient_norm.copy(name="total_gradient_norm")) from extensions import Compressor for step_rule in algorithm.step_rule.components: if isinstance(step_rule, Compressor): step_channels.append( step_rule.norm.copy(name="compressor.norm")) step_channels.append( step_rule.newnorm.copy(name="compressor.newnorm")) step_channels.append( step_rule.median.copy(name="compressor.median")) step_channels.append( step_rule.ratio.copy(name="compressor.ratio")) step_channels.extend( outputs["train"][key] for key in "cost emitter_cost excursion_cost cross_entropy error_rate".split( )) step_channels.extend( util.uniqueify_names_last_resort( util.dedup(( var.mean().copy(name="bn_stat:%s" % util.get_path(var)) for var in graph.deep_ancestors([outputs["train"]["cost"]]) if hasattr(var.tag, "batch_normalization_brick")), equal=util.equal_computations))) logger.warning("constructing training data monitor") extensions.append( TrainingDataMonitoring(step_channels, prefix="iteration", after_batch=True)) if "parameters" in monitor_options: data_independent_channels = [] for parameter in graphs["train"].parameters: if parameter.name in "gamma beta W b".split(): quantity = parameter.norm(2) quantity.name = "parameter.norm:%s" % util.get_path(parameter) data_independent_channels.append(quantity) for key in "location_std scale_std".split(): data_independent_channels.append( hyperparameters[key].copy(name="parameter:%s" % key)) extensions.append( DataStreamMonitoring(data_independent_channels, data_stream=None, after_epoch=True)) for which_set in "train valid test".split(): channels = [] channels.extend(outputs[which_set][key] for key in "cost emitter_cost excursion_cost".split()) channels.extend(outputs[which_set][key] for key in task.monitor_outputs()) channels.append( outputs[which_set]["savings"].mean().copy(name="mean_savings")) if "theta" in monitor_options: for key in "true_scale raw_location raw_scale".split(): for stat in "mean var".split(): channels.append( getattr(outputs[which_set][key], stat)(axis=1).copy(name="%s.%s" % (key, stat))) if which_set == "train": if "activations" in monitor_options: from blocks.roles import has_roles, OUTPUT cnn_outputs = OrderedDict() for var in theano.gof.graph.ancestors( graphs[which_set].outputs): if (has_roles(var, [OUTPUT]) and util.annotated_by_a( util.get_convolution_classes(), var)): cnn_outputs.setdefault(util.get_path(var), []).append(var) for path, vars in cnn_outputs.items(): vars = util.dedup(vars, equal=util.equal_computations) for i, var in enumerate(vars): channels.append(var.mean().copy( name="activation[%i].mean:%s" % (i, path))) if "batch_normalization" in monitor_options: errors = [] for population_stat, update in updates[which_set]: if population_stat.name.startswith("population"): # this is a super robust way to get the # corresponding batch statistic from the # exponential moving average expression batch_stat = update.owner.inputs[1].owner.inputs[1] errors.append(((population_stat - batch_stat)**2).mean()) if errors: channels.append( T.stack(errors).mean().copy( name="population_statistic_mse")) logger.warning("constructing %s monitor" % which_set) extensions.append( DataStreamMonitoring(channels, prefix=which_set, after_epoch=True, data_stream=task.get_stream(which_set, monitor=True))) if "patches" in monitor_options: from patchmonitor import PatchMonitoring, VideoPatchMonitoring patchmonitor = None if n_spatial_dims == 2: patchmonitor_klass = PatchMonitoring elif n_spatial_dims == 3: patchmonitor_klass = VideoPatchMonitoring if patchmonitor_klass: for which in "train valid".split(): patch = outputs[which]["patch"] patch = patch.dimshuffle(1, 0, *range(2, patch.ndim)) patch_extractor = theano.function( [outputs[which][key] for key in "x x_shape".split()], [ outputs[which][key] for key in "raw_location raw_scale".split() ] + [patch]) patchmonitor = patchmonitor_klass( save_to="%s_patches_%s" % (hyperparameters["name"], which), data_stream=task.get_stream(which, shuffle=False, num_examples=10), every_n_batches=patchmonitor_interval, extractor=patch_extractor, map_to_input_space=attention.static_map_to_input_space) patchmonitor.save_patches("patchmonitor_test.png") extensions.append(patchmonitor) if plot_url: plot_channels = [] plot_channels.extend(task.plot_channels()) plot_channels.append(["train_cost"]) #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels]) from blocks.extras.extensions.plot import Plot extensions.append( Plot(name, channels=plot_channels, after_epoch=True, server_url=plot_url)) return extensions
def construct_monitors(algorithm, task, model, graphs, outputs, updates, monitor_options, n_spatial_dims, plot_url, hyperparameters, patchmonitor_interval, **kwargs): from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring extensions = [] if "steps" in monitor_options: step_channels = [] step_channels.extend([ algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name) for name, param in model.get_parameter_dict().items()]) step_channels.append(algorithm.total_step_norm.copy(name="total_step_norm")) step_channels.append(algorithm.total_gradient_norm.copy(name="total_gradient_norm")) from extensions import Compressor for step_rule in algorithm.step_rule.components: if isinstance(step_rule, Compressor): step_channels.append(step_rule.norm.copy(name="compressor.norm")) step_channels.append(step_rule.newnorm.copy(name="compressor.newnorm")) step_channels.append(step_rule.median.copy(name="compressor.median")) step_channels.append(step_rule.ratio.copy(name="compressor.ratio")) step_channels.extend(outputs["train"][key] for key in "cost emitter_cost excursion_cost cross_entropy error_rate".split()) step_channels.extend(util.uniqueify_names_last_resort(util.dedup( (var.mean().copy(name="bn_stat:%s" % util.get_path(var)) for var in graph.deep_ancestors([outputs["train"]["cost"]]) if hasattr(var.tag, "batch_normalization_brick")), equal=util.equal_computations))) logger.warning("constructing training data monitor") extensions.append(TrainingDataMonitoring( step_channels, prefix="iteration", after_batch=True)) if "parameters" in monitor_options: data_independent_channels = [] for parameter in graphs["train"].parameters: if parameter.name in "gamma beta W b".split(): quantity = parameter.norm(2) quantity.name = "parameter.norm:%s" % util.get_path(parameter) data_independent_channels.append(quantity) for key in "location_std scale_std".split(): data_independent_channels.append(hyperparameters[key].copy(name="parameter:%s" % key)) extensions.append(DataStreamMonitoring( data_independent_channels, data_stream=None, after_epoch=True)) for which_set in "train valid test".split(): channels = [] channels.extend(outputs[which_set][key] for key in "cost emitter_cost excursion_cost".split()) channels.extend(outputs[which_set][key] for key in task.monitor_outputs()) channels.append(outputs[which_set]["savings"] .mean().copy(name="mean_savings")) if "theta" in monitor_options: for key in "true_scale raw_location raw_scale".split(): for stat in "mean var".split(): channels.append(getattr(outputs[which_set][key], stat)(axis=1) .copy(name="%s.%s" % (key, stat))) if which_set == "train": if "activations" in monitor_options: from blocks.roles import has_roles, OUTPUT cnn_outputs = OrderedDict() for var in theano.gof.graph.ancestors(graphs[which_set].outputs): if (has_roles(var, [OUTPUT]) and util.annotated_by_a( util.get_convolution_classes(), var)): cnn_outputs.setdefault(util.get_path(var), []).append(var) for path, vars in cnn_outputs.items(): vars = util.dedup(vars, equal=util.equal_computations) for i, var in enumerate(vars): channels.append(var.mean().copy( name="activation[%i].mean:%s" % (i, path))) if "batch_normalization" in monitor_options: errors = [] for population_stat, update in updates[which_set]: if population_stat.name.startswith("population"): # this is a super robust way to get the # corresponding batch statistic from the # exponential moving average expression batch_stat = update.owner.inputs[1].owner.inputs[1] errors.append(((population_stat - batch_stat)**2).mean()) if errors: channels.append(T.stack(errors).mean().copy(name="population_statistic_mse")) logger.warning("constructing %s monitor" % which_set) extensions.append(DataStreamMonitoring( channels, prefix=which_set, after_epoch=True, data_stream=task.get_stream(which_set, monitor=True))) if "patches" in monitor_options: from patchmonitor import PatchMonitoring, VideoPatchMonitoring patchmonitor = None if n_spatial_dims == 2: patchmonitor_klass = PatchMonitoring elif n_spatial_dims == 3: patchmonitor_klass = VideoPatchMonitoring if patchmonitor_klass: for which in "train valid".split(): patch = outputs[which]["patch"] patch = patch.dimshuffle(1, 0, *range(2, patch.ndim)) patch_extractor = theano.function( [outputs[which][key] for key in "x x_shape".split()], [outputs[which][key] for key in "raw_location raw_scale".split()] + [patch]) patchmonitor = patchmonitor_klass( save_to="%s_patches_%s" % (hyperparameters["name"], which), data_stream=task.get_stream(which, shuffle=False, num_examples=10), every_n_batches=patchmonitor_interval, extractor=patch_extractor, map_to_input_space=attention.static_map_to_input_space) patchmonitor.save_patches("patchmonitor_test.png") extensions.append(patchmonitor) if plot_url: plot_channels = [] plot_channels.extend(task.plot_channels()) plot_channels.append(["train_cost"]) #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels]) from blocks.extras.extensions.plot import Plot extensions.append(Plot(name, channels=plot_channels, after_epoch=True, server_url=plot_url)) return extensions
def construct_monitors(algorithm, task, model, graphs, outputs, plot_url, hyperparameters, **kwargs): from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring from patchmonitor import PatchMonitoring, VideoPatchMonitoring extensions = [] if True: extensions.append( TrainingDataMonitoring([ algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name) for name, param in model.get_parameter_dict().items() ], prefix="train", after_epoch=True)) if True: data_independent_channels = [] for parameter in graphs["train"].parameters: if parameter.name in "gamma beta W b".split(): quantity = parameter.norm(2) quantity.name = "parameter.norm:%s" % util.get_path(parameter) data_independent_channels.append(quantity) extensions.append( DataStreamMonitoring(data_independent_channels, data_stream=None, after_epoch=True)) for which_set in "train valid test".split(): channels = [] channels.extend(outputs[which_set][key] for key in "cost".split()) channels.extend(outputs[which_set][key] for key in task.monitor_outputs()) if which_set == "train": if True: from blocks.roles import has_roles, OUTPUT cnn_outputs = OrderedDict() for var in theano.gof.graph.ancestors( graphs[which_set].outputs): if (has_roles(var, [OUTPUT]) and util.annotated_by_a( util.get_convolution_classes(), var)): cnn_outputs.setdefault(util.get_path(var), []).append(var) for path, vars in cnn_outputs.items(): vars = util.dedup(vars, equal=util.equal_computations) for i, var in enumerate(vars): channels.append(var.mean().copy( name="activation[%i].mean:%s" % (i, path))) channels.append( algorithm.total_gradient_norm.copy(name="total_gradient_norm")) extensions.append( DataStreamMonitoring(channels, prefix=which_set, after_epoch=True, data_stream=task.get_stream(which_set, monitor=True))) if plot_url: plot_channels = [] plot_channels.extend(task.plot_channels()) plot_channels.append(["train_cost"]) #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels]) from blocks.extras.extensions.plot import Plot extensions.append( Plot(name, channels=plot_channels, after_epoch=True, server_url=plot_url)) return extensions
def construct_monitors(algorithm, task, model, graphs, outputs, updates, monitor_options, n_spatial_dims, hyperparameters, **kwargs): from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring extensions = [] if "steps" in monitor_options: step_channels = [] step_channels.extend([ algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name) for name, param in model.get_parameter_dict().items()]) step_channels.append(algorithm.total_step_norm.copy(name="total_step_norm")) step_channels.append(algorithm.total_gradient_norm.copy(name="total_gradient_norm")) logger.warning("constructing training data monitor") extensions.append(TrainingDataMonitoring( step_channels, prefix="train", after_epoch=True)) if "parameters" in monitor_options: data_independent_channels = [] for parameter in graphs["train"].parameters: if parameter.name in "gamma beta W b".split(): quantity = parameter.norm(2) quantity.name = "parameter.norm:%s" % util.get_path(parameter) data_independent_channels.append(quantity) for key in "location_std scale_std".split(): data_independent_channels.append(hyperparameters[key].copy(name="parameter:%s" % key)) extensions.append(DataStreamMonitoring( data_independent_channels, data_stream=None, after_epoch=True)) for which_set in "train test".split(): channels = [] channels.extend(outputs[which_set][key] for key in "cost emitter_cost excursion_cost".split()) channels.extend(outputs[which_set][key] for key in task.monitor_outputs()) channels.append(outputs[which_set]["savings"] .mean().copy(name="mean_savings")) if "theta" in monitor_options: for key in "raw_location raw_scale".split(): for stat in "mean var".split(): channels.append(getattr(outputs[which_set][key], stat)(axis=1) .copy(name="%s.%s" % (key, stat))) if which_set == "train": if "activations" in monitor_options: from blocks.roles import has_roles, OUTPUT cnn_outputs = OrderedDict() for var in theano.gof.graph.ancestors(graphs[which_set].outputs): if (has_roles(var, [OUTPUT]) and util.annotated_by_a( util.get_convolution_classes(), var)): cnn_outputs.setdefault(util.get_path(var), []).append(var) for path, vars in cnn_outputs.items(): vars = util.dedup(vars, equal=util.equal_computations) for i, var in enumerate(vars): channels.append(var.mean().copy( name="activation[%i].mean:%s" % (i, path))) if "batch_normalization" in monitor_options: errors = [] for population_stat, update in updates[which_set]: if population_stat.name.startswith("population"): # this is a super robust way to get the # corresponding batch statistic from the # exponential moving average expression batch_stat = update.owner.inputs[1].owner.inputs[1] errors.append(((population_stat - batch_stat)**2).mean()) if errors: channels.append(T.stack(errors).mean().copy(name="population_statistic_mse")) logger.warning("constructing %s monitor" % which_set) extensions.append(DataStreamMonitoring( channels, prefix=which_set, after_epoch=True, data_stream=task.get_stream(which_set, monitor=True))) return extensions