Example #1
0
def tag_convnet_dropout(outputs, rng=None, **kwargs):
    from blocks.roles import has_roles, OUTPUT
    cnn_outputs = OrderedDict()
    for var in theano.gof.graph.ancestors(outputs):
        if (has_roles(var, [OUTPUT])
                and util.annotated_by_a(util.get_convolution_classes(), var)):
            cnn_outputs.setdefault(util.get_path(var), []).append(var)
    unique_outputs = []
    for path, vars in cnn_outputs.items():
        vars = util.dedup(vars, equal=util.equal_computations)
        unique_outputs.extend(vars)
    graph.add_transform(unique_outputs,
                        graph.DropoutTransform("convnet_dropout", rng=rng),
                        reason="regularization")
Example #2
0
def tag_convnet_dropout(outputs, rng=None, **kwargs):
    from blocks.roles import has_roles, OUTPUT
    cnn_outputs = OrderedDict()
    for var in theano.gof.graph.ancestors(outputs):
        if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                util.get_convolution_classes(), var)):
            cnn_outputs.setdefault(util.get_path(var), []).append(var)
    unique_outputs = []
    for path, vars in cnn_outputs.items():
        vars = util.dedup(vars, equal=util.equal_computations)
        unique_outputs.append(util.the(vars))
    graph.add_transform(
        unique_outputs,
        graph.DropoutTransform("convnet_dropout", rng=rng),
        reason="regularization")
Example #3
0
def construct_monitors(algorithm, task, model, graphs, outputs, updates,
                       monitor_options, n_spatial_dims, plot_url,
                       hyperparameters, patchmonitor_interval, **kwargs):
    from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring

    extensions = []

    if "steps" in monitor_options:
        step_channels = []
        step_channels.extend([
            algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
            for name, param in model.get_parameter_dict().items()
        ])
        step_channels.append(
            algorithm.total_step_norm.copy(name="total_step_norm"))
        step_channels.append(
            algorithm.total_gradient_norm.copy(name="total_gradient_norm"))

        from extensions import Compressor
        for step_rule in algorithm.step_rule.components:
            if isinstance(step_rule, Compressor):
                step_channels.append(
                    step_rule.norm.copy(name="compressor.norm"))
                step_channels.append(
                    step_rule.newnorm.copy(name="compressor.newnorm"))
                step_channels.append(
                    step_rule.median.copy(name="compressor.median"))
                step_channels.append(
                    step_rule.ratio.copy(name="compressor.ratio"))

        step_channels.extend(
            outputs["train"][key] for key in
            "cost emitter_cost excursion_cost cross_entropy error_rate".split(
            ))

        step_channels.extend(
            util.uniqueify_names_last_resort(
                util.dedup((
                    var.mean().copy(name="bn_stat:%s" % util.get_path(var))
                    for var in graph.deep_ancestors([outputs["train"]["cost"]])
                    if hasattr(var.tag, "batch_normalization_brick")),
                           equal=util.equal_computations)))

        logger.warning("constructing training data monitor")
        extensions.append(
            TrainingDataMonitoring(step_channels,
                                   prefix="iteration",
                                   after_batch=True))

    if "parameters" in monitor_options:
        data_independent_channels = []
        for parameter in graphs["train"].parameters:
            if parameter.name in "gamma beta W b".split():
                quantity = parameter.norm(2)
                quantity.name = "parameter.norm:%s" % util.get_path(parameter)
                data_independent_channels.append(quantity)
        for key in "location_std scale_std".split():
            data_independent_channels.append(
                hyperparameters[key].copy(name="parameter:%s" % key))
        extensions.append(
            DataStreamMonitoring(data_independent_channels,
                                 data_stream=None,
                                 after_epoch=True))

    for which_set in "train valid test".split():
        channels = []
        channels.extend(outputs[which_set][key]
                        for key in "cost emitter_cost excursion_cost".split())
        channels.extend(outputs[which_set][key]
                        for key in task.monitor_outputs())
        channels.append(
            outputs[which_set]["savings"].mean().copy(name="mean_savings"))

        if "theta" in monitor_options:
            for key in "true_scale raw_location raw_scale".split():
                for stat in "mean var".split():
                    channels.append(
                        getattr(outputs[which_set][key],
                                stat)(axis=1).copy(name="%s.%s" % (key, stat)))
        if which_set == "train":
            if "activations" in monitor_options:
                from blocks.roles import has_roles, OUTPUT
                cnn_outputs = OrderedDict()
                for var in theano.gof.graph.ancestors(
                        graphs[which_set].outputs):
                    if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                            util.get_convolution_classes(), var)):
                        cnn_outputs.setdefault(util.get_path(var),
                                               []).append(var)
                for path, vars in cnn_outputs.items():
                    vars = util.dedup(vars, equal=util.equal_computations)
                    for i, var in enumerate(vars):
                        channels.append(var.mean().copy(
                            name="activation[%i].mean:%s" % (i, path)))

        if "batch_normalization" in monitor_options:
            errors = []
            for population_stat, update in updates[which_set]:
                if population_stat.name.startswith("population"):
                    # this is a super robust way to get the
                    # corresponding batch statistic from the
                    # exponential moving average expression
                    batch_stat = update.owner.inputs[1].owner.inputs[1]
                    errors.append(((population_stat - batch_stat)**2).mean())
            if errors:
                channels.append(
                    T.stack(errors).mean().copy(
                        name="population_statistic_mse"))

        logger.warning("constructing %s monitor" % which_set)
        extensions.append(
            DataStreamMonitoring(channels,
                                 prefix=which_set,
                                 after_epoch=True,
                                 data_stream=task.get_stream(which_set,
                                                             monitor=True)))

    if "patches" in monitor_options:
        from patchmonitor import PatchMonitoring, VideoPatchMonitoring

        patchmonitor = None
        if n_spatial_dims == 2:
            patchmonitor_klass = PatchMonitoring
        elif n_spatial_dims == 3:
            patchmonitor_klass = VideoPatchMonitoring

        if patchmonitor_klass:
            for which in "train valid".split():
                patch = outputs[which]["patch"]
                patch = patch.dimshuffle(1, 0, *range(2, patch.ndim))
                patch_extractor = theano.function(
                    [outputs[which][key] for key in "x x_shape".split()], [
                        outputs[which][key]
                        for key in "raw_location raw_scale".split()
                    ] + [patch])

                patchmonitor = patchmonitor_klass(
                    save_to="%s_patches_%s" % (hyperparameters["name"], which),
                    data_stream=task.get_stream(which,
                                                shuffle=False,
                                                num_examples=10),
                    every_n_batches=patchmonitor_interval,
                    extractor=patch_extractor,
                    map_to_input_space=attention.static_map_to_input_space)
                patchmonitor.save_patches("patchmonitor_test.png")
                extensions.append(patchmonitor)

    if plot_url:
        plot_channels = []
        plot_channels.extend(task.plot_channels())
        plot_channels.append(["train_cost"])
        #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

        from blocks.extras.extensions.plot import Plot
        extensions.append(
            Plot(name,
                 channels=plot_channels,
                 after_epoch=True,
                 server_url=plot_url))

    return extensions
Example #4
0
def construct_monitors(algorithm, task, model, graphs, outputs,
                       updates, monitor_options, n_spatial_dims,
                       plot_url, hyperparameters,
                       patchmonitor_interval, **kwargs):
    from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring

    extensions = []

    if "steps" in monitor_options:
        step_channels = []
        step_channels.extend([
            algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
            for name, param in model.get_parameter_dict().items()])
        step_channels.append(algorithm.total_step_norm.copy(name="total_step_norm"))
        step_channels.append(algorithm.total_gradient_norm.copy(name="total_gradient_norm"))

        from extensions import Compressor
        for step_rule in algorithm.step_rule.components:
            if isinstance(step_rule, Compressor):
                step_channels.append(step_rule.norm.copy(name="compressor.norm"))
                step_channels.append(step_rule.newnorm.copy(name="compressor.newnorm"))
                step_channels.append(step_rule.median.copy(name="compressor.median"))
                step_channels.append(step_rule.ratio.copy(name="compressor.ratio"))

        step_channels.extend(outputs["train"][key] for key in
                             "cost emitter_cost excursion_cost cross_entropy error_rate".split())

        step_channels.extend(util.uniqueify_names_last_resort(util.dedup(
            (var.mean().copy(name="bn_stat:%s" % util.get_path(var))
             for var in graph.deep_ancestors([outputs["train"]["cost"]])
             if hasattr(var.tag, "batch_normalization_brick")),
            equal=util.equal_computations)))

        logger.warning("constructing training data monitor")
        extensions.append(TrainingDataMonitoring(
            step_channels, prefix="iteration", after_batch=True))

    if "parameters" in monitor_options:
        data_independent_channels = []
        for parameter in graphs["train"].parameters:
            if parameter.name in "gamma beta W b".split():
                quantity = parameter.norm(2)
                quantity.name = "parameter.norm:%s" % util.get_path(parameter)
                data_independent_channels.append(quantity)
        for key in "location_std scale_std".split():
            data_independent_channels.append(hyperparameters[key].copy(name="parameter:%s" % key))
        extensions.append(DataStreamMonitoring(
            data_independent_channels, data_stream=None, after_epoch=True))

    for which_set in "train valid test".split():
        channels = []
        channels.extend(outputs[which_set][key] for key in
                        "cost emitter_cost excursion_cost".split())
        channels.extend(outputs[which_set][key] for key in
                        task.monitor_outputs())
        channels.append(outputs[which_set]["savings"]
                        .mean().copy(name="mean_savings"))

        if "theta" in monitor_options:
            for key in "true_scale raw_location raw_scale".split():
                for stat in "mean var".split():
                    channels.append(getattr(outputs[which_set][key], stat)(axis=1)
                                    .copy(name="%s.%s" % (key, stat)))
        if which_set == "train":
            if "activations" in monitor_options:
                from blocks.roles import has_roles, OUTPUT
                cnn_outputs = OrderedDict()
                for var in theano.gof.graph.ancestors(graphs[which_set].outputs):
                    if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                            util.get_convolution_classes(), var)):
                        cnn_outputs.setdefault(util.get_path(var), []).append(var)
                for path, vars in cnn_outputs.items():
                    vars = util.dedup(vars, equal=util.equal_computations)
                    for i, var in enumerate(vars):
                        channels.append(var.mean().copy(
                            name="activation[%i].mean:%s" % (i, path)))

        if "batch_normalization" in monitor_options:
            errors = []
            for population_stat, update in updates[which_set]:
                if population_stat.name.startswith("population"):
                    # this is a super robust way to get the
                    # corresponding batch statistic from the
                    # exponential moving average expression
                    batch_stat = update.owner.inputs[1].owner.inputs[1]
                    errors.append(((population_stat - batch_stat)**2).mean())
            if errors:
                channels.append(T.stack(errors).mean().copy(name="population_statistic_mse"))

        logger.warning("constructing %s monitor" % which_set)
        extensions.append(DataStreamMonitoring(
            channels, prefix=which_set, after_epoch=True,
            data_stream=task.get_stream(which_set, monitor=True)))

    if "patches" in monitor_options:
        from patchmonitor import PatchMonitoring, VideoPatchMonitoring

        patchmonitor = None
        if n_spatial_dims == 2:
            patchmonitor_klass = PatchMonitoring
        elif n_spatial_dims == 3:
            patchmonitor_klass = VideoPatchMonitoring

        if patchmonitor_klass:
            for which in "train valid".split():
                patch = outputs[which]["patch"]
                patch = patch.dimshuffle(1, 0, *range(2, patch.ndim))
                patch_extractor = theano.function(
                    [outputs[which][key] for key in "x x_shape".split()],
                    [outputs[which][key] for key in "raw_location raw_scale".split()] + [patch])

                patchmonitor = patchmonitor_klass(
                    save_to="%s_patches_%s" % (hyperparameters["name"], which),
                    data_stream=task.get_stream(which, shuffle=False, num_examples=10),
                    every_n_batches=patchmonitor_interval,
                    extractor=patch_extractor,
                    map_to_input_space=attention.static_map_to_input_space)
                patchmonitor.save_patches("patchmonitor_test.png")
                extensions.append(patchmonitor)

    if plot_url:
        plot_channels = []
        plot_channels.extend(task.plot_channels())
        plot_channels.append(["train_cost"])
        #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

        from blocks.extras.extensions.plot import Plot
        extensions.append(Plot(name, channels=plot_channels,
                            after_epoch=True, server_url=plot_url))

    return extensions
Example #5
0
def construct_monitors(algorithm, task, model, graphs, outputs, plot_url,
                       hyperparameters, **kwargs):
    from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring
    from patchmonitor import PatchMonitoring, VideoPatchMonitoring

    extensions = []

    if True:
        extensions.append(
            TrainingDataMonitoring([
                algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
                for name, param in model.get_parameter_dict().items()
            ],
                                   prefix="train",
                                   after_epoch=True))

    if True:
        data_independent_channels = []
        for parameter in graphs["train"].parameters:
            if parameter.name in "gamma beta W b".split():
                quantity = parameter.norm(2)
                quantity.name = "parameter.norm:%s" % util.get_path(parameter)
                data_independent_channels.append(quantity)
        extensions.append(
            DataStreamMonitoring(data_independent_channels,
                                 data_stream=None,
                                 after_epoch=True))

    for which_set in "train valid test".split():
        channels = []
        channels.extend(outputs[which_set][key] for key in "cost".split())
        channels.extend(outputs[which_set][key]
                        for key in task.monitor_outputs())
        if which_set == "train":
            if True:
                from blocks.roles import has_roles, OUTPUT
                cnn_outputs = OrderedDict()
                for var in theano.gof.graph.ancestors(
                        graphs[which_set].outputs):
                    if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                            util.get_convolution_classes(), var)):
                        cnn_outputs.setdefault(util.get_path(var),
                                               []).append(var)
                for path, vars in cnn_outputs.items():
                    vars = util.dedup(vars, equal=util.equal_computations)
                    for i, var in enumerate(vars):
                        channels.append(var.mean().copy(
                            name="activation[%i].mean:%s" % (i, path)))

            channels.append(
                algorithm.total_gradient_norm.copy(name="total_gradient_norm"))
        extensions.append(
            DataStreamMonitoring(channels,
                                 prefix=which_set,
                                 after_epoch=True,
                                 data_stream=task.get_stream(which_set,
                                                             monitor=True)))

    if plot_url:
        plot_channels = []
        plot_channels.extend(task.plot_channels())
        plot_channels.append(["train_cost"])
        #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

        from blocks.extras.extensions.plot import Plot
        extensions.append(
            Plot(name,
                 channels=plot_channels,
                 after_epoch=True,
                 server_url=plot_url))

    return extensions
Example #6
0
def construct_monitors(algorithm, task, model, graphs, outputs,
                       updates, monitor_options, n_spatial_dims,
                       hyperparameters, **kwargs):
    from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring

    extensions = []

    if "steps" in monitor_options:
        step_channels = []
        step_channels.extend([
            algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
            for name, param in model.get_parameter_dict().items()])
        step_channels.append(algorithm.total_step_norm.copy(name="total_step_norm"))
        step_channels.append(algorithm.total_gradient_norm.copy(name="total_gradient_norm"))
        logger.warning("constructing training data monitor")
        extensions.append(TrainingDataMonitoring(
            step_channels, prefix="train", after_epoch=True))

    if "parameters" in monitor_options:
        data_independent_channels = []
        for parameter in graphs["train"].parameters:
            if parameter.name in "gamma beta W b".split():
                quantity = parameter.norm(2)
                quantity.name = "parameter.norm:%s" % util.get_path(parameter)
                data_independent_channels.append(quantity)
        for key in "location_std scale_std".split():
            data_independent_channels.append(hyperparameters[key].copy(name="parameter:%s" % key))
        extensions.append(DataStreamMonitoring(
            data_independent_channels, data_stream=None, after_epoch=True))

    for which_set in "train test".split():
        channels = []
        channels.extend(outputs[which_set][key] for key in
                        "cost emitter_cost excursion_cost".split())
        channels.extend(outputs[which_set][key] for key in
                        task.monitor_outputs())
        channels.append(outputs[which_set]["savings"]
                        .mean().copy(name="mean_savings"))

        if "theta" in monitor_options:
            for key in "raw_location raw_scale".split():
                for stat in "mean var".split():
                    channels.append(getattr(outputs[which_set][key], stat)(axis=1)
                                    .copy(name="%s.%s" % (key, stat)))
        if which_set == "train":
            if "activations" in monitor_options:
                from blocks.roles import has_roles, OUTPUT
                cnn_outputs = OrderedDict()
                for var in theano.gof.graph.ancestors(graphs[which_set].outputs):
                    if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                            util.get_convolution_classes(), var)):
                        cnn_outputs.setdefault(util.get_path(var), []).append(var)
                for path, vars in cnn_outputs.items():
                    vars = util.dedup(vars, equal=util.equal_computations)
                    for i, var in enumerate(vars):
                        channels.append(var.mean().copy(
                            name="activation[%i].mean:%s" % (i, path)))

        if "batch_normalization" in monitor_options:
            errors = []
            for population_stat, update in updates[which_set]:
                if population_stat.name.startswith("population"):
                    # this is a super robust way to get the
                    # corresponding batch statistic from the
                    # exponential moving average expression
                    batch_stat = update.owner.inputs[1].owner.inputs[1]
                    errors.append(((population_stat - batch_stat)**2).mean())
            if errors:
                channels.append(T.stack(errors).mean().copy(name="population_statistic_mse"))

        logger.warning("constructing %s monitor" % which_set)
        extensions.append(DataStreamMonitoring(
            channels, prefix=which_set, after_epoch=True,
            data_stream=task.get_stream(which_set, monitor=True)))

    return extensions