Esempio n. 1
0
 def cost(self, x, y, n_patches):
     energy = self.mlp.apply(x)
     cross_entropy = self.softmax.categorical_cross_entropy(
         y.flatten(), energy).mean(axis=0)
     error_rate = T.neq(y, energy.argmax(axis=1)).mean(axis=0)
     cost = util.named(cross_entropy, "cost")
     self.add_auxiliary_variable(cross_entropy, name="cross_entropy")
     self.add_auxiliary_variable(error_rate, name="error_rate")
     return cost
Esempio n. 2
0
 def cost(self, x, y, n_patches):
     energy = self.mlp.apply(x)
     cross_entropy = self.softmax.categorical_cross_entropy(
         y.flatten(), energy).mean(axis=0)
     error_rate = T.neq(y, energy.argmax(axis=1)).mean(axis=0)
     cost = util.named(cross_entropy, "cost")
     self.add_auxiliary_variable(cross_entropy, name="cross_entropy")
     self.add_auxiliary_variable(error_rate, name="error_rate")
     return cost
Esempio n. 3
0
 def cost(self, cs, y, n_patches):
     energies = [self.mlp.apply(cs[:, t, :])
                 for t in xrange(n_patches)]
     cross_entropies = [self.softmax.categorical_cross_entropy(y.flatten(), energy)
                        for energy in energies]
     error_rates = [T.neq(y, energy.argmax(axis=1)).mean(axis=0)
                    for energy in energies]
     # train on final prediction
     cost = util.named(cross_entropies[-1], "cost")
     # monitor final prediction
     self.add_auxiliary_variable(cross_entropies[-1], name="cross_entropy")
     self.add_auxiliary_variable(error_rates[-1], name="error_rate")
     return cost
Esempio n. 4
0
 def cost(self, cs, y, n_patches):
     energies = [self.mlp.apply(cs[:, t, :]) for t in xrange(n_patches)]
     cross_entropies = [
         self.softmax.categorical_cross_entropy(y.flatten(), energy)
         for energy in energies
     ]
     error_rates = [
         T.neq(y, energy.argmax(axis=1)).mean(axis=0) for energy in energies
     ]
     # train on final prediction
     cost = util.named(cross_entropies[-1], "cost")
     # monitor final prediction
     self.add_auxiliary_variable(cross_entropies[-1], name="cross_entropy")
     self.add_auxiliary_variable(error_rates[-1], name="error_rate")
     return cost
Esempio n. 5
0
def construct_monitors(algorithm, task, n_patches, x, x_shape,
                       graph, name, ram, model, cost,
                       n_spatial_dims, plot_url, patchmonitor_interval=100, **kwargs):
    location, scale, savings = util.get_recurrent_auxiliaries(
        "location scale savings".split(), graph, n_patches)

    channels = util.Channels()
    channels.extend(task.monitor_channels(graph))

    channels.append(util.named(savings.mean(), "savings.mean"))

    for variable_name in "location scale".split():
        variable = locals()[variable_name]
        channels.append(variable.mean(axis=0),
                        "%s.mean" % variable_name)
        channels.append(variable.var(axis=0),
                        "%s.variance" % variable_name)

    channels.append(algorithm.total_gradient_norm,
                    "total_gradient_norm")

    step_norms = util.Channels()
    step_norms.extend(util.named(l2_norm([algorithm.steps[param]]),
                                 "%s.step_norm" % name)
                      for name, param in model.get_parameter_dict().items())
    step_channels = step_norms.get_channels()

    #for activation in VariableFilter(roles=[OUTPUT])(graph.variables):
    #    quantity = activation.mean()
    #    quantity.name = "%s.mean" % util.get_path(activation)
    #    channels.append(quantity)

    data_independent_channels = util.Channels()
    for parameter in graph.parameters:
        if parameter.name in "gamma beta".split():
            quantity = parameter.mean()
            quantity.name = "%s.mean" % util.get_path(parameter)
            data_independent_channels.append(quantity)

    extensions = []

    extensions.append(TrainingDataMonitoring(
        step_channels, prefix="train", after_epoch=True))

    extensions.append(DataStreamMonitoring(data_independent_channels.get_channels(),
                                           data_stream=None, after_epoch=True))
    extensions.extend(DataStreamMonitoring((channels.get_channels() + [cost]),
                                           data_stream=task.get_stream(which, monitor=True),
                                           prefix=which, after_epoch=True)
                      for which in "train valid test".split())

    patchmonitor = None
    if n_spatial_dims == 2:
        patchmonitor_klass = PatchMonitoring
    elif n_spatial_dims == 3:
        patchmonitor_klass = VideoPatchMonitoring

    if patchmonitor_klass:
        patch = T.stack(*[
            ram.crop(x, x_shape, location[:, i, :], scale[:, i, :])
            for i in xrange(n_patches)])
        patch = patch.dimshuffle(1, 0, *range(2, patch.ndim))
        patch_extractor = theano.function([x, x_shape],
                                          [location, scale, patch])

        for which in "train valid".split():
            patchmonitor = patchmonitor_klass(
                save_to="%s_patches_%s" % (name, which),
                data_stream=task.get_stream(which, shuffle=False, num_examples=5),
                every_n_batches=patchmonitor_interval,
                extractor=patch_extractor,
                map_to_input_space=attention.static_map_to_input_space)
            patchmonitor.save_patches("patchmonitor_test.png")
            extensions.append(patchmonitor)

    if plot_url:
        plot_channels = []
        plot_channels.extend(task.plot_channels())
        plot_channels.append(["train_cost"])
        #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

        from blocks.extras.extensions.plot import Plot
        extensions.append(Plot(name, channels=plot_channels,
                            after_epoch=True, server_url=plot_url))

    return extensions
Esempio n. 6
0
def construct_monitors(
    algorithm,
    task,
    n_patches,
    x,
    x_uncentered,
    hs,
    graph,
    plot_url,
    name,
    ram,
    model,
    cost,
    n_spatial_dims,
    patchmonitor_interval=100,
    **kwargs
):
    location, scale, savings = util.get_recurrent_auxiliaries("location scale savings".split(), graph, n_patches)

    channels = util.Channels()
    channels.extend(task.monitor_channels(graph))
    for i in xrange(n_patches):
        channels.append(hs[:, i].mean(), "h%i.mean" % i)

    channels.append(util.named(savings.mean(), "savings.mean"))

    for variable_name in "location scale".split():
        variable = locals()[variable_name]
        channels.append(variable.var(axis=0).mean(), "%s.batch_variance" % variable_name)
        channels.append(variable.var(axis=1).mean(), "%s.time_variance" % variable_name)

    # step_norms = util.Channels()
    # step_norms.extend(util.named(l2_norm([algorithm.steps[param]]),
    #                             "%s.step_norm" % name)
    #                  for name, param in model.get_parameter_dict().items())
    # step_channels = step_norms.get_channels()

    for activation in VariableFilter(roles=[OUTPUT])(graph.variables):
        quantity = activation.mean()
        quantity.name = "%s.mean" % util.get_path(activation)
        channels.append(quantity)

    extensions = []

    # extensions.append(TrainingDataMonitoring(
    #    step_channels,
    #    prefix="train", after_epoch=True))

    extensions.extend(
        DataStreamMonitoring(
            (channels.get_channels() + [cost]), data_stream=task.get_stream(which), prefix=which, after_epoch=True
        )
        for which in "train valid test".split()
    )

    patchmonitor = None
    if n_spatial_dims == 2:
        patchmonitor_klass = PatchMonitoring
    elif n_spatial_dims == 3:
        patchmonitor_klass = VideoPatchMonitoring

    if patchmonitor_klass:
        # get patches from original (uncentered) images
        patch = T.stack(
            *[ram.attention.crop(x_uncentered, location[:, i, :], scale[:, i, :]) for i in xrange(n_patches)]
        )
        patch = patch.dimshuffle(1, 0, *range(2, patch.ndim))

        patchmonitor = patchmonitor_klass(
            task.get_stream("valid", SequentialScheme(5, 5)),
            every_n_batches=patchmonitor_interval,
            extractor=theano.function([x_uncentered], [location, scale, patch]),
            map_to_input_space=masonry.static_map_to_input_space,
        )
        patchmonitor.save_patches("test.png")
        extensions.append(patchmonitor)

    plot_channels = []
    plot_channels.extend(task.plot_channels())
    plot_channels.append(["train_cost"])
    # plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

    extensions.append(Plot(name, channels=plot_channels, after_epoch=True, server_url=plot_url))

    return extensions
Esempio n. 7
0
def construct_monitors(algorithm,
                       task,
                       n_patches,
                       x,
                       x_uncentered,
                       hs,
                       graph,
                       plot_url,
                       name,
                       ram,
                       model,
                       cost,
                       n_spatial_dims,
                       patchmonitor_interval=100,
                       **kwargs):
    location, scale, savings = util.get_recurrent_auxiliaries(
        "location scale savings".split(), graph, n_patches)

    channels = util.Channels()
    channels.extend(task.monitor_channels(graph))

    #for i in xrange(n_patches):
    #    channels.append(hs[:, i].mean(), "h%i.mean" % i)

    channels.append(util.named(savings.mean(), "savings.mean"))

    for variable_name in "location scale".split():
        variable = locals()[variable_name]
        channels.append(
            variable.var(axis=0).mean(), "%s.batch_variance" % variable_name)
        channels.append(
            variable.var(axis=1).mean(), "%s.time_variance" % variable_name)

    #step_norms = util.Channels()
    #step_norms.extend(util.named(l2_norm([algorithm.steps[param]]),
    #                             "%s.step_norm" % name)
    #                  for name, param in model.get_parameter_dict().items())
    #step_channels = step_norms.get_channels()

    #for activation in VariableFilter(roles=[OUTPUT])(graph.variables):
    #    quantity = activation.mean()
    #    quantity.name = "%s.mean" % util.get_path(activation)
    #    channels.append(quantity)

    for parameter in graph.parameters:
        if parameter.name in "gamma beta".split():
            quantity = parameter.mean()
            quantity.name = "%s.mean" % util.get_path(parameter)
            channels.append(quantity)

    extensions = []

    #extensions.append(TrainingDataMonitoring(
    #    step_channels,
    #    prefix="train", after_epoch=True))

    extensions.extend(
        DataStreamMonitoring((channels.get_channels() + [cost]),
                             data_stream=task.get_stream(which),
                             prefix=which,
                             after_epoch=True)
        for which in "train valid test".split())

    patchmonitor = None
    if n_spatial_dims == 2:
        patchmonitor_klass = PatchMonitoring
    elif n_spatial_dims == 3:
        patchmonitor_klass = VideoPatchMonitoring

    if patchmonitor_klass:
        # get patches from original (uncentered) images
        patch = T.stack(*[
            ram.attention.crop(x_uncentered, location[:, i, :], scale[:, i, :])
            for i in xrange(n_patches)
        ])
        patch = patch.dimshuffle(1, 0, *range(2, patch.ndim))

        patchmonitor = patchmonitor_klass(
            task.get_stream("valid", SequentialScheme(5, 5)),
            every_n_batches=patchmonitor_interval,
            extractor=theano.function([x_uncentered],
                                      [location, scale, patch]),
            map_to_input_space=masonry.static_map_to_input_space)
        patchmonitor.save_patches("test.png")
        extensions.append(patchmonitor)

    plot_channels = []
    plot_channels.extend(task.plot_channels())
    plot_channels.append(["train_cost"])
    #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

    extensions.append(
        Plot(name,
             channels=plot_channels,
             after_epoch=True,
             server_url=plot_url))

    return extensions
Esempio n. 8
0
def construct_monitors(algorithm,
                       task,
                       n_patches,
                       x,
                       x_shape,
                       graph,
                       name,
                       ram,
                       model,
                       cost,
                       n_spatial_dims,
                       plot_url,
                       patchmonitor_interval=100,
                       **kwargs):
    location, scale, savings = util.get_recurrent_auxiliaries(
        "location scale savings".split(), graph, n_patches)

    channels = util.Channels()
    channels.extend(task.monitor_channels(graph))

    channels.append(util.named(savings.mean(), "savings.mean"))

    for variable_name in "location scale".split():
        variable = locals()[variable_name]
        channels.append(variable.mean(axis=0), "%s.mean" % variable_name)
        channels.append(variable.var(axis=0), "%s.variance" % variable_name)

    channels.append(algorithm.total_gradient_norm, "total_gradient_norm")

    step_norms = util.Channels()
    step_norms.extend(
        util.named(l2_norm([algorithm.steps[param]]), "%s.step_norm" % name)
        for name, param in model.get_parameter_dict().items())
    step_channels = step_norms.get_channels()

    #for activation in VariableFilter(roles=[OUTPUT])(graph.variables):
    #    quantity = activation.mean()
    #    quantity.name = "%s.mean" % util.get_path(activation)
    #    channels.append(quantity)

    data_independent_channels = util.Channels()
    for parameter in graph.parameters:
        if parameter.name in "gamma beta".split():
            quantity = parameter.mean()
            quantity.name = "%s.mean" % util.get_path(parameter)
            data_independent_channels.append(quantity)

    extensions = []

    extensions.append(
        TrainingDataMonitoring(step_channels, prefix="train",
                               after_epoch=True))

    extensions.append(
        DataStreamMonitoring(data_independent_channels.get_channels(),
                             data_stream=None,
                             after_epoch=True))
    extensions.extend(
        DataStreamMonitoring((channels.get_channels() + [cost]),
                             data_stream=task.get_stream(which, monitor=True),
                             prefix=which,
                             after_epoch=True)
        for which in "train valid test".split())

    patchmonitor = None
    if n_spatial_dims == 2:
        patchmonitor_klass = PatchMonitoring
    elif n_spatial_dims == 3:
        patchmonitor_klass = VideoPatchMonitoring

    if patchmonitor_klass:
        patch = T.stack(*[
            ram.crop(x, x_shape, location[:, i, :], scale[:, i, :])
            for i in xrange(n_patches)
        ])
        patch = patch.dimshuffle(1, 0, *range(2, patch.ndim))
        patch_extractor = theano.function([x, x_shape],
                                          [location, scale, patch])

        for which in "train valid".split():
            patchmonitor = patchmonitor_klass(
                save_to="%s_patches_%s" % (name, which),
                data_stream=task.get_stream(which,
                                            shuffle=False,
                                            num_examples=5),
                every_n_batches=patchmonitor_interval,
                extractor=patch_extractor,
                map_to_input_space=attention.static_map_to_input_space)
            patchmonitor.save_patches("patchmonitor_test.png")
            extensions.append(patchmonitor)

    if plot_url:
        plot_channels = []
        plot_channels.extend(task.plot_channels())
        plot_channels.append(["train_cost"])
        #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

        from blocks.extras.extensions.plot import Plot
        extensions.append(
            Plot(name,
                 channels=plot_channels,
                 after_epoch=True,
                 server_url=plot_url))

    return extensions