Beispiel #1
0
 def simple_assertions(self, updates, num_bricks=2, num_updates=4):
     """Shared assertions for simple tests."""
     assert len(updates) == num_updates
     assert all(is_shared_variable(u[0]) for u in updates)
     # This order is somewhat arbitrary and implementation_dependent
     means = set(u[0] for u in updates
                 if has_roles(u[0], [BATCH_NORM_POPULATION_MEAN]))
     stdevs = set(u[0] for u in updates
                  if has_roles(u[0], [BATCH_NORM_POPULATION_STDEV]))
     assert means.isdisjoint(stdevs)
     assert len(set(get_brick(v) for v in means)) == num_bricks
     assert len(set(get_brick(v) for v in stdevs)) == num_bricks
Beispiel #2
0
 def simple_assertions(self, updates, num_bricks=2, num_updates=4):
     """Shared assertions for simple tests."""
     assert len(updates) == num_updates
     assert all(is_shared_variable(u[0]) for u in updates)
     # This order is somewhat arbitrary and implementation_dependent
     means = set(u[0] for u in updates
                 if has_roles(u[0], [BATCH_NORM_POPULATION_MEAN]))
     stdevs = set(u[0] for u in updates
                  if has_roles(u[0], [BATCH_NORM_POPULATION_STDEV]))
     assert means.isdisjoint(stdevs)
     assert len(set(get_brick(v) for v in means)) == num_bricks
     assert len(set(get_brick(v) for v in stdevs)) == num_bricks
Beispiel #3
0
    def get_updates(variables):
        # this is fugly because we must get the batch stats from the
        # graph so we get the ones that are *actually being used in
        # the computation* after graph transforms have been applied
        updates = []
        variables = graph.deep_ancestors(variables)
        for stat, role in BatchNormalization.roles.items():
            from blocks.roles import has_roles
            batch_stats = [var for var in variables if has_roles(var, [role])]
            batch_stats = util.dedup(batch_stats, equal=util.equal_computations)

            batch_stats_by_brick = OrderedDict()
            for batch_stat in batch_stats:
                brick = batch_stat.tag.batch_normalization_brick
                population_stat = brick.population_stats[stat]
                batch_stats_by_brick.setdefault(brick, []).append(batch_stat)

            for brick, batch_stats in batch_stats_by_brick.items():
                population_stat = brick.population_stats[stat]
                if len(batch_stats) > 1:
                    # makes sense for recurrent structures
                    logger.warning("averaging multiple population statistic estimates to update %s: %s"
                                   % (util.get_path(population_stat), batch_stats))
                batch_stat = T.stack(batch_stats).mean(axis=0)
                updates.append((population_stat,
                                (1 - brick.alpha) * population_stat
                                + brick.alpha * batch_stat))
        return updates
Beispiel #4
0
    def tag_recurrent_dropout(self,
                              variables,
                              recurrent_dropout,
                              rng=None,
                              **hyperparameters):
        from blocks.roles import OUTPUT, has_roles
        ancestors = graph.deep_ancestors(variables)
        for lstm in self.rnn.transitions:
            variables = [
                var for var in ancestors
                if (has_roles(var, [OUTPUT]) and lstm in var.tag.annotations
                    and var.name.endswith("states"))
            ]

            # get one dropout mask for all time steps.  use the very
            # first state to get the hidden state shape, else we get
            # graph cycles.
            initial_state = util.the(
                [var for var in variables if "initial_state" in var.name])
            mask = util.get_dropout_mask(initial_state.shape,
                                         recurrent_dropout,
                                         rng=rng)

            subsequent_states = [
                var for var in variables if "initial_state" not in var.name
            ]
            graph.add_transform(subsequent_states,
                                graph.DropoutTransform("recurrent_dropout",
                                                       mask=mask),
                                reason="regularization")
Beispiel #5
0
    def __call__(self, variables):
        """Filter the given variables.

        Parameters
        ----------
        variables : list of :class:`~tensor.TensorVariable`

        """
        if self.roles:
            variables = [var for var in variables
                         if has_roles(var, self.roles, self.each_role)]
        if self.bricks is not None:
            filtered_variables = []
            for var in variables:
                var_brick = get_brick(var)
                if var_brick is None:
                    continue
                for brick in self.bricks:
                    if isclass(brick) and isinstance(var_brick, brick):
                        filtered_variables.append(var)
                        break
                    elif isinstance(brick, Brick) and var_brick is brick:
                        filtered_variables.append(var)
                        break
            variables = filtered_variables
        if self.name:
            variables = [var for var in variables
                         if hasattr(var.tag, 'name') and
                         self.name == var.tag.name]
        if self.name_regex:
            variables = [var for var in variables
                         if hasattr(var.tag, 'name') and
                         re.match(self.name_regex, var.tag.name)]
        if self.theano_name:
            variables = [var for var in variables
                         if (var.name is not None) and
                         self.theano_name == var.name]
        if self.theano_name_regex:
            variables = [var for var in variables
                         if (var.name is not None) and
                         re.match(self.theano_name_regex, var.name)]
        if self.applications:
            filtered_variables = []
            for var in variables:
                var_application = get_application_call(var)
                if var_application is None:
                    continue
                if (var_application.application in
                        self.applications or
                        var_application.application.application in
                        self.applications):
                    filtered_variables.append(var)
            variables = filtered_variables
        if self.call_id:
            variables = [
                var for var in variables
                if get_application_call(var) and
                get_application_call(var).metadata['call_id'] == self.call_id]
        return variables
Beispiel #6
0
    def get_gradients(self, features, n_samples):
        """Perform inference and calculate gradients.

        Returns
        -------
        log_px : T.fvector
        log_psx : T.fvector
        gradients : OrderedDict
        """
        p_layers = self.p_layers
        q_layers = self.q_layers
        n_layers = len(p_layers)

        batch_size = features.shape[0]

        x = replicate_batch(features, n_samples)

        # Get Q-samples
        samples, log_p, log_q = self.sample_q(x)

        # Reshape and sum
        samples = unflatten_values(samples, batch_size, n_samples)
        log_p = unflatten_values(log_p, batch_size, n_samples)
        log_q = unflatten_values(log_q, batch_size, n_samples)

        log_p_all = sum(log_p)
        log_q_all = sum(log_q)

        # Approximate log(p(x))
        log_px = logsumexp(log_p_all - log_q_all, axis=-1) - tensor.log(n_samples)
        log_psx = (logsumexp((log_p_all - log_q_all) / 2, axis=-1) - tensor.log(n_samples)) * 2.

        # Approximate log p(x) and calculate IS weights
        w = self.importance_weights(log_p, log_q)

        wp = w.reshape((batch_size * n_samples, ))
        wq = w.reshape((batch_size * n_samples, ))
        wq = wq - (1. / n_samples)

        samples = flatten_values(samples, batch_size * n_samples)

        gradients = OrderedDict()
        for l in xrange(n_layers - 1):
            gradients = merge_gradients(gradients, p_layers[l].get_gradients(samples[l], samples[l + 1], weights=wp))
            gradients = merge_gradients(gradients, q_layers[l].get_gradients(samples[l + 1], samples[l], weights=wq))
        gradients = merge_gradients(gradients, p_layers[-1].get_gradients(samples[-1], weights=wp))

        if (self.l1reg > 0.) or (self.l2reg > 0.):
            reg_gradients = OrderedDict()
            params = Selector(self).get_parameters()
            for pname, param in params.iteritems():
                if has_roles(param, (WEIGHT,)):
                    reg_cost = self.l1reg * tensor.sum(abs(param)) + self.l2reg * tensor.sum(param ** 2)
                    reg_gradients[param] = tensor.grad(reg_cost, param)
            gradients = merge_gradients(gradients, reg_gradients)

        return log_px, log_psx, gradients
Beispiel #7
0
    def __call__(self, variables):
        """Filter the given variables.

        Parameters
        ----------
        variables : list of :class:`~tensor.TensorVariable`

        """
        if self.roles:
            variables = [
                var for var in variables
                if has_roles(var, self.roles, self.each_role)
            ]
        if self.bricks is not None:
            filtered_variables = []
            for var in variables:
                var_brick = get_brick(var)
                if var_brick is None:
                    continue
                for brick in self.bricks:
                    if isclass(brick) and isinstance(var_brick, brick):
                        filtered_variables.append(var)
                        break
                    elif isinstance(brick, Brick) and var_brick is brick:
                        filtered_variables.append(var)
                        break
            variables = filtered_variables
        if self.name:
            variables = [
                var for var in variables
                if hasattr(var.tag, 'name') and self.name == var.tag.name
            ]
        if self.name_regex:
            variables = [
                var for var in variables if hasattr(var.tag, 'name')
                and re.match(self.name_regex, var.tag.name)
            ]
        if self.theano_name:
            variables = [
                var for var in variables
                if (var.name is not None) and self.theano_name == var.name
            ]
        if self.theano_name_regex:
            variables = [
                var for var in variables if (var.name is not None)
                and re.match(self.theano_name_regex, var.name)
            ]
        if self.applications:
            variables = [
                var for var in variables if get_application_call(var)
                and get_application_call(var).application in self.applications
            ]
        return variables
Beispiel #8
0
def tag_convnet_dropout(outputs, rng=None, **kwargs):
    from blocks.roles import has_roles, OUTPUT
    cnn_outputs = OrderedDict()
    for var in theano.gof.graph.ancestors(outputs):
        if (has_roles(var, [OUTPUT])
                and util.annotated_by_a(util.get_convolution_classes(), var)):
            cnn_outputs.setdefault(util.get_path(var), []).append(var)
    unique_outputs = []
    for path, vars in cnn_outputs.items():
        vars = util.dedup(vars, equal=util.equal_computations)
        unique_outputs.extend(vars)
    graph.add_transform(unique_outputs,
                        graph.DropoutTransform("convnet_dropout", rng=rng),
                        reason="regularization")
Beispiel #9
0
def tag_convnet_dropout(outputs, rng=None, **kwargs):
    from blocks.roles import has_roles, OUTPUT
    cnn_outputs = OrderedDict()
    for var in theano.gof.graph.ancestors(outputs):
        if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                util.get_convolution_classes(), var)):
            cnn_outputs.setdefault(util.get_path(var), []).append(var)
    unique_outputs = []
    for path, vars in cnn_outputs.items():
        vars = util.dedup(vars, equal=util.equal_computations)
        unique_outputs.append(util.the(vars))
    graph.add_transform(
        unique_outputs,
        graph.DropoutTransform("convnet_dropout", rng=rng),
        reason="regularization")
Beispiel #10
0
 def tag_attention_dropout(self, variables, rng=None, **hyperparameters):
     from blocks.roles import INPUT, has_roles
     bricks_ = [
         brick for brick in util.all_bricks([self.patch_transform])
         if isinstance(brick, (bricks.Linear, conv2d.Convolutional,
                               conv3d.Convolutional))
     ]
     variables = [
         var for var in graph.deep_ancestors(variables)
         if (has_roles(var, [INPUT]) and any(brick in var.tag.annotations
                                             for brick in bricks_))
     ]
     graph.add_transform(variables,
                         graph.DropoutTransform("attention_dropout",
                                                rng=rng),
                         reason="regularization")
Beispiel #11
0
    def get_gradients(self, features, n_samples):
        """Perform inference and calculate gradients.

        Returns
        -------
            log_px : T.fvector
            log_psx : T.fvector
            gradients : OrderedDict
        """
        p_layers = self.p_layers
        q_layers = self.q_layers
        n_layers = len(p_layers)

        batch_size = features.shape[0]

        x = replicate_batch(features, n_samples)

        # Get Q-samples
        samples, log_p, log_q = self.sample_q(x)

        # Reshape and sum
        samples = unflatten_values(samples, batch_size, n_samples)
        log_p = unflatten_values(log_p, batch_size, n_samples)
        log_q = unflatten_values(log_q, batch_size, n_samples)

        log_p_all = sum(log_p)
        log_q_all = sum(log_q)

        # Approximate log p(x)
        log_px_bound = log_p_all[:,0] - log_q_all[:,0]
        log_px  = logsumexp(log_p_all-log_q_all, axis=-1) - tensor.log(n_samples)
        log_psx = (logsumexp((log_p_all-log_q_all)/2, axis=-1) - tensor.log(n_samples)) * 2.

        # Calculate IS weights
        w = self.importance_weights(log_p, log_q)

        wp = w.reshape( (batch_size*n_samples, ) )
        wq = w.reshape( (batch_size*n_samples, ) )
        wq = wq - (1./n_samples)

        samples = flatten_values(samples, batch_size*n_samples)

        gradients = OrderedDict()
        for l in xrange(n_layers-1):
            gradients = merge_gradients(gradients, p_layers[l].get_gradients(samples[l], samples[l+1], weights=wp))
            gradients = merge_gradients(gradients, q_layers[l].get_gradients(samples[l+1], samples[l], weights=wq))
        gradients = merge_gradients(gradients, p_layers[-1].get_gradients(samples[-1], weights=wp))

        if (self.l1reg > 0.) or (self.l2reg > 0.):
            reg_gradients = OrderedDict()
            params = Selector(self).get_parameters()
            for pname, param in params.iteritems():
                if has_roles(param, (WEIGHT,)):
                    reg_cost = self.l1reg * tensor.sum(abs(param)) + self.l2reg * tensor.sum(param**2)
                    reg_gradients[param] = tensor.grad(reg_cost, param)
            gradients = merge_gradients(gradients, reg_gradients)

        self.log_p_bound = log_px_bound
        self.log_p = log_px
        self.log_ph = log_psx

        return log_px, log_psx, gradients
Beispiel #12
0
 def parameters(self):
     return [
         var for var in self.shared_variables
         if has_roles(var, [PARAMETER])
     ]
Beispiel #13
0
    def replace(self, replacements):
        """Replace certain variables in the computation graph.

        Parameters
        ----------
        replacements : dict
            The mapping from variables to be replaced to the corresponding
            substitutes.

        Examples
        --------
        >>> import theano
        >>> from theano import tensor, function
        >>> x = tensor.scalar('x')
        >>> y = x + 2
        >>> z = y + 3
        >>> a = z + 5

        Let's suppose we have dependent replacements like

        >>> replacements = {y: x * 2, z: y * 3}
        >>> cg = ComputationGraph([a])
        >>> theano.pprint(a)  # doctest: +NORMALIZE_WHITESPACE
        '(((x + TensorConstant{2}) + TensorConstant{3}) +
        TensorConstant{5})'
        >>> cg_new = cg.replace(replacements)
        >>> theano.pprint(
        ...     cg_new.outputs[0])  # doctest: +NORMALIZE_WHITESPACE
        '(((x * TensorConstant{2}) * TensorConstant{3}) +
        TensorConstant{5})'

        First two sums turned into multiplications

        >>> float(function(cg_new.inputs, cg_new.outputs)(3.)[0])
        23.0

        """
        # Due to theano specifics we have to make one replacement in time
        replacements = OrderedDict(replacements)

        outputs_cur = self.outputs

        # `replacements` with previous replacements applied. We have to track
        # variables in the new graph corresponding to original replacements.
        replacement_keys_cur = []
        replacement_vals_cur = []
        # Sort `replacements` in topological order
        # variables in self.variables are in topological order
        remaining_replacements = replacements.copy()
        for variable in self.variables:
            if variable in replacements:
                if has_roles(variable, [AUXILIARY]):
                    warnings.warn(
                        "replace method was asked to replace a variable ({}) "
                        "that is an auxiliary variable.".format(variable))
                replacement_keys_cur.append(variable)
                # self.variables should not contain duplicates,
                # otherwise pop() may fail.
                replacement_vals_cur.append(
                    remaining_replacements.pop(variable))

        # if remaining_replacements is not empty
        if remaining_replacements:
            warnings.warn(
                "replace method was asked to replace a variable(s) ({}) "
                "that is not a part of the computational "
                "graph.".format(str(remaining_replacements.keys())))

        # Replace step-by-step in topological order
        while replacement_keys_cur:
            replace_what = replacement_keys_cur[0]
            replace_by = replacement_vals_cur[0]
            # We also want to make changes in future replacements
            outputs_new = theano.clone(outputs_cur + replacement_keys_cur[1:] +
                                       replacement_vals_cur[1:],
                                       replace={replace_what: replace_by})
            # Reconstruct outputs, keys, and values
            outputs_cur = outputs_new[:len(outputs_cur)]
            replacement_keys_cur = outputs_new[len(outputs_cur
                                                   ):len(outputs_cur) +
                                               len(replacement_keys_cur) - 1]
            replacement_vals_cur = outputs_new[len(outputs_cur) +
                                               len(replacement_keys_cur):]

        return ComputationGraph(outputs_cur)
Beispiel #14
0
 def auxiliary_variables(self):
     return [var for var in self.variables if has_roles(var, [AUXILIARY])]
Beispiel #15
0
 def auxiliary_variables(self):
     return [var for var in self.variables if has_roles(var, [AUXILIARY])]
Beispiel #16
0
 def parameters(self):
     return [var for var in self.shared_variables
             if has_roles(var, [PARAMETER])]
Beispiel #17
0
def construct_monitors(algorithm, task, model, graphs, outputs,
                       updates, monitor_options, n_spatial_dims,
                       hyperparameters, **kwargs):
    from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring

    extensions = []

    if "steps" in monitor_options:
        step_channels = []
        step_channels.extend([
            algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
            for name, param in model.get_parameter_dict().items()])
        step_channels.append(algorithm.total_step_norm.copy(name="total_step_norm"))
        step_channels.append(algorithm.total_gradient_norm.copy(name="total_gradient_norm"))
        logger.warning("constructing training data monitor")
        extensions.append(TrainingDataMonitoring(
            step_channels, prefix="train", after_epoch=True))

    if "parameters" in monitor_options:
        data_independent_channels = []
        for parameter in graphs["train"].parameters:
            if parameter.name in "gamma beta W b".split():
                quantity = parameter.norm(2)
                quantity.name = "parameter.norm:%s" % util.get_path(parameter)
                data_independent_channels.append(quantity)
        for key in "location_std scale_std".split():
            data_independent_channels.append(hyperparameters[key].copy(name="parameter:%s" % key))
        extensions.append(DataStreamMonitoring(
            data_independent_channels, data_stream=None, after_epoch=True))

    for which_set in "train test".split():
        channels = []
        channels.extend(outputs[which_set][key] for key in
                        "cost emitter_cost excursion_cost".split())
        channels.extend(outputs[which_set][key] for key in
                        task.monitor_outputs())
        channels.append(outputs[which_set]["savings"]
                        .mean().copy(name="mean_savings"))

        if "theta" in monitor_options:
            for key in "raw_location raw_scale".split():
                for stat in "mean var".split():
                    channels.append(getattr(outputs[which_set][key], stat)(axis=1)
                                    .copy(name="%s.%s" % (key, stat)))
        if which_set == "train":
            if "activations" in monitor_options:
                from blocks.roles import has_roles, OUTPUT
                cnn_outputs = OrderedDict()
                for var in theano.gof.graph.ancestors(graphs[which_set].outputs):
                    if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                            util.get_convolution_classes(), var)):
                        cnn_outputs.setdefault(util.get_path(var), []).append(var)
                for path, vars in cnn_outputs.items():
                    vars = util.dedup(vars, equal=util.equal_computations)
                    for i, var in enumerate(vars):
                        channels.append(var.mean().copy(
                            name="activation[%i].mean:%s" % (i, path)))

        if "batch_normalization" in monitor_options:
            errors = []
            for population_stat, update in updates[which_set]:
                if population_stat.name.startswith("population"):
                    # this is a super robust way to get the
                    # corresponding batch statistic from the
                    # exponential moving average expression
                    batch_stat = update.owner.inputs[1].owner.inputs[1]
                    errors.append(((population_stat - batch_stat)**2).mean())
            if errors:
                channels.append(T.stack(errors).mean().copy(name="population_statistic_mse"))

        logger.warning("constructing %s monitor" % which_set)
        extensions.append(DataStreamMonitoring(
            channels, prefix=which_set, after_epoch=True,
            data_stream=task.get_stream(which_set, monitor=True)))

    return extensions
Beispiel #18
0
def construct_monitors(algorithm, task, model, graphs, outputs, plot_url,
                       hyperparameters, **kwargs):
    from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring
    from patchmonitor import PatchMonitoring, VideoPatchMonitoring

    extensions = []

    if True:
        extensions.append(
            TrainingDataMonitoring([
                algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
                for name, param in model.get_parameter_dict().items()
            ],
                                   prefix="train",
                                   after_epoch=True))

    if True:
        data_independent_channels = []
        for parameter in graphs["train"].parameters:
            if parameter.name in "gamma beta W b".split():
                quantity = parameter.norm(2)
                quantity.name = "parameter.norm:%s" % util.get_path(parameter)
                data_independent_channels.append(quantity)
        extensions.append(
            DataStreamMonitoring(data_independent_channels,
                                 data_stream=None,
                                 after_epoch=True))

    for which_set in "train valid test".split():
        channels = []
        channels.extend(outputs[which_set][key] for key in "cost".split())
        channels.extend(outputs[which_set][key]
                        for key in task.monitor_outputs())
        if which_set == "train":
            if True:
                from blocks.roles import has_roles, OUTPUT
                cnn_outputs = OrderedDict()
                for var in theano.gof.graph.ancestors(
                        graphs[which_set].outputs):
                    if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                            util.get_convolution_classes(), var)):
                        cnn_outputs.setdefault(util.get_path(var),
                                               []).append(var)
                for path, vars in cnn_outputs.items():
                    vars = util.dedup(vars, equal=util.equal_computations)
                    for i, var in enumerate(vars):
                        channels.append(var.mean().copy(
                            name="activation[%i].mean:%s" % (i, path)))

            channels.append(
                algorithm.total_gradient_norm.copy(name="total_gradient_norm"))
        extensions.append(
            DataStreamMonitoring(channels,
                                 prefix=which_set,
                                 after_epoch=True,
                                 data_stream=task.get_stream(which_set,
                                                             monitor=True)))

    if plot_url:
        plot_channels = []
        plot_channels.extend(task.plot_channels())
        plot_channels.append(["train_cost"])
        #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

        from blocks.extras.extensions.plot import Plot
        extensions.append(
            Plot(name,
                 channels=plot_channels,
                 after_epoch=True,
                 server_url=plot_url))

    return extensions
Beispiel #19
0
 def parameters(self):
     all_parameters = list(chain(
         *[i.parameters for i in self.ops if hasattr(i, 'parameters')]))
     return [i for i in all_parameters if has_roles(i, PARAMETER)]
Beispiel #20
0
 def parameters(self):
     return [i for i in self._configuration.parameters if has_roles(i, PARAMETER)]
Beispiel #21
0
 def _initialize(self):
     for param in self.params:
         if has_roles(param, [WEIGHT]):
             self.weights_init.initialize(param, self.rng)
         elif has_roles(param, [BIAS]):
             self.biases_init.initialize(param, self.rng)
Beispiel #22
0
 def _initialize(self):
     for param in self.params:
         if has_roles(param, [WEIGHT]):
             self.weights_init.initialize(param, self.rng)
         elif has_roles(param, [BIAS]):
             self.biases_init.initialize(param, self.rng)
Beispiel #23
0
def construct_monitors(algorithm, task, model, graphs, outputs,
                       updates, monitor_options, n_spatial_dims,
                       plot_url, hyperparameters,
                       patchmonitor_interval, **kwargs):
    from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring

    extensions = []

    if "steps" in monitor_options:
        step_channels = []
        step_channels.extend([
            algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
            for name, param in model.get_parameter_dict().items()])
        step_channels.append(algorithm.total_step_norm.copy(name="total_step_norm"))
        step_channels.append(algorithm.total_gradient_norm.copy(name="total_gradient_norm"))

        from extensions import Compressor
        for step_rule in algorithm.step_rule.components:
            if isinstance(step_rule, Compressor):
                step_channels.append(step_rule.norm.copy(name="compressor.norm"))
                step_channels.append(step_rule.newnorm.copy(name="compressor.newnorm"))
                step_channels.append(step_rule.median.copy(name="compressor.median"))
                step_channels.append(step_rule.ratio.copy(name="compressor.ratio"))

        step_channels.extend(outputs["train"][key] for key in
                             "cost emitter_cost excursion_cost cross_entropy error_rate".split())

        step_channels.extend(util.uniqueify_names_last_resort(util.dedup(
            (var.mean().copy(name="bn_stat:%s" % util.get_path(var))
             for var in graph.deep_ancestors([outputs["train"]["cost"]])
             if hasattr(var.tag, "batch_normalization_brick")),
            equal=util.equal_computations)))

        logger.warning("constructing training data monitor")
        extensions.append(TrainingDataMonitoring(
            step_channels, prefix="iteration", after_batch=True))

    if "parameters" in monitor_options:
        data_independent_channels = []
        for parameter in graphs["train"].parameters:
            if parameter.name in "gamma beta W b".split():
                quantity = parameter.norm(2)
                quantity.name = "parameter.norm:%s" % util.get_path(parameter)
                data_independent_channels.append(quantity)
        for key in "location_std scale_std".split():
            data_independent_channels.append(hyperparameters[key].copy(name="parameter:%s" % key))
        extensions.append(DataStreamMonitoring(
            data_independent_channels, data_stream=None, after_epoch=True))

    for which_set in "train valid test".split():
        channels = []
        channels.extend(outputs[which_set][key] for key in
                        "cost emitter_cost excursion_cost".split())
        channels.extend(outputs[which_set][key] for key in
                        task.monitor_outputs())
        channels.append(outputs[which_set]["savings"]
                        .mean().copy(name="mean_savings"))

        if "theta" in monitor_options:
            for key in "true_scale raw_location raw_scale".split():
                for stat in "mean var".split():
                    channels.append(getattr(outputs[which_set][key], stat)(axis=1)
                                    .copy(name="%s.%s" % (key, stat)))
        if which_set == "train":
            if "activations" in monitor_options:
                from blocks.roles import has_roles, OUTPUT
                cnn_outputs = OrderedDict()
                for var in theano.gof.graph.ancestors(graphs[which_set].outputs):
                    if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                            util.get_convolution_classes(), var)):
                        cnn_outputs.setdefault(util.get_path(var), []).append(var)
                for path, vars in cnn_outputs.items():
                    vars = util.dedup(vars, equal=util.equal_computations)
                    for i, var in enumerate(vars):
                        channels.append(var.mean().copy(
                            name="activation[%i].mean:%s" % (i, path)))

        if "batch_normalization" in monitor_options:
            errors = []
            for population_stat, update in updates[which_set]:
                if population_stat.name.startswith("population"):
                    # this is a super robust way to get the
                    # corresponding batch statistic from the
                    # exponential moving average expression
                    batch_stat = update.owner.inputs[1].owner.inputs[1]
                    errors.append(((population_stat - batch_stat)**2).mean())
            if errors:
                channels.append(T.stack(errors).mean().copy(name="population_statistic_mse"))

        logger.warning("constructing %s monitor" % which_set)
        extensions.append(DataStreamMonitoring(
            channels, prefix=which_set, after_epoch=True,
            data_stream=task.get_stream(which_set, monitor=True)))

    if "patches" in monitor_options:
        from patchmonitor import PatchMonitoring, VideoPatchMonitoring

        patchmonitor = None
        if n_spatial_dims == 2:
            patchmonitor_klass = PatchMonitoring
        elif n_spatial_dims == 3:
            patchmonitor_klass = VideoPatchMonitoring

        if patchmonitor_klass:
            for which in "train valid".split():
                patch = outputs[which]["patch"]
                patch = patch.dimshuffle(1, 0, *range(2, patch.ndim))
                patch_extractor = theano.function(
                    [outputs[which][key] for key in "x x_shape".split()],
                    [outputs[which][key] for key in "raw_location raw_scale".split()] + [patch])

                patchmonitor = patchmonitor_klass(
                    save_to="%s_patches_%s" % (hyperparameters["name"], which),
                    data_stream=task.get_stream(which, shuffle=False, num_examples=10),
                    every_n_batches=patchmonitor_interval,
                    extractor=patch_extractor,
                    map_to_input_space=attention.static_map_to_input_space)
                patchmonitor.save_patches("patchmonitor_test.png")
                extensions.append(patchmonitor)

    if plot_url:
        plot_channels = []
        plot_channels.extend(task.plot_channels())
        plot_channels.append(["train_cost"])
        #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

        from blocks.extras.extensions.plot import Plot
        extensions.append(Plot(name, channels=plot_channels,
                            after_epoch=True, server_url=plot_url))

    return extensions
Beispiel #24
0
def construct_monitors(algorithm, task, model, graphs, outputs, updates,
                       monitor_options, n_spatial_dims, plot_url,
                       hyperparameters, patchmonitor_interval, **kwargs):
    from blocks.extensions.monitoring import TrainingDataMonitoring, DataStreamMonitoring

    extensions = []

    if "steps" in monitor_options:
        step_channels = []
        step_channels.extend([
            algorithm.steps[param].norm(2).copy(name="step_norm:%s" % name)
            for name, param in model.get_parameter_dict().items()
        ])
        step_channels.append(
            algorithm.total_step_norm.copy(name="total_step_norm"))
        step_channels.append(
            algorithm.total_gradient_norm.copy(name="total_gradient_norm"))

        from extensions import Compressor
        for step_rule in algorithm.step_rule.components:
            if isinstance(step_rule, Compressor):
                step_channels.append(
                    step_rule.norm.copy(name="compressor.norm"))
                step_channels.append(
                    step_rule.newnorm.copy(name="compressor.newnorm"))
                step_channels.append(
                    step_rule.median.copy(name="compressor.median"))
                step_channels.append(
                    step_rule.ratio.copy(name="compressor.ratio"))

        step_channels.extend(
            outputs["train"][key] for key in
            "cost emitter_cost excursion_cost cross_entropy error_rate".split(
            ))

        step_channels.extend(
            util.uniqueify_names_last_resort(
                util.dedup((
                    var.mean().copy(name="bn_stat:%s" % util.get_path(var))
                    for var in graph.deep_ancestors([outputs["train"]["cost"]])
                    if hasattr(var.tag, "batch_normalization_brick")),
                           equal=util.equal_computations)))

        logger.warning("constructing training data monitor")
        extensions.append(
            TrainingDataMonitoring(step_channels,
                                   prefix="iteration",
                                   after_batch=True))

    if "parameters" in monitor_options:
        data_independent_channels = []
        for parameter in graphs["train"].parameters:
            if parameter.name in "gamma beta W b".split():
                quantity = parameter.norm(2)
                quantity.name = "parameter.norm:%s" % util.get_path(parameter)
                data_independent_channels.append(quantity)
        for key in "location_std scale_std".split():
            data_independent_channels.append(
                hyperparameters[key].copy(name="parameter:%s" % key))
        extensions.append(
            DataStreamMonitoring(data_independent_channels,
                                 data_stream=None,
                                 after_epoch=True))

    for which_set in "train valid test".split():
        channels = []
        channels.extend(outputs[which_set][key]
                        for key in "cost emitter_cost excursion_cost".split())
        channels.extend(outputs[which_set][key]
                        for key in task.monitor_outputs())
        channels.append(
            outputs[which_set]["savings"].mean().copy(name="mean_savings"))

        if "theta" in monitor_options:
            for key in "true_scale raw_location raw_scale".split():
                for stat in "mean var".split():
                    channels.append(
                        getattr(outputs[which_set][key],
                                stat)(axis=1).copy(name="%s.%s" % (key, stat)))
        if which_set == "train":
            if "activations" in monitor_options:
                from blocks.roles import has_roles, OUTPUT
                cnn_outputs = OrderedDict()
                for var in theano.gof.graph.ancestors(
                        graphs[which_set].outputs):
                    if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                            util.get_convolution_classes(), var)):
                        cnn_outputs.setdefault(util.get_path(var),
                                               []).append(var)
                for path, vars in cnn_outputs.items():
                    vars = util.dedup(vars, equal=util.equal_computations)
                    for i, var in enumerate(vars):
                        channels.append(var.mean().copy(
                            name="activation[%i].mean:%s" % (i, path)))

        if "batch_normalization" in monitor_options:
            errors = []
            for population_stat, update in updates[which_set]:
                if population_stat.name.startswith("population"):
                    # this is a super robust way to get the
                    # corresponding batch statistic from the
                    # exponential moving average expression
                    batch_stat = update.owner.inputs[1].owner.inputs[1]
                    errors.append(((population_stat - batch_stat)**2).mean())
            if errors:
                channels.append(
                    T.stack(errors).mean().copy(
                        name="population_statistic_mse"))

        logger.warning("constructing %s monitor" % which_set)
        extensions.append(
            DataStreamMonitoring(channels,
                                 prefix=which_set,
                                 after_epoch=True,
                                 data_stream=task.get_stream(which_set,
                                                             monitor=True)))

    if "patches" in monitor_options:
        from patchmonitor import PatchMonitoring, VideoPatchMonitoring

        patchmonitor = None
        if n_spatial_dims == 2:
            patchmonitor_klass = PatchMonitoring
        elif n_spatial_dims == 3:
            patchmonitor_klass = VideoPatchMonitoring

        if patchmonitor_klass:
            for which in "train valid".split():
                patch = outputs[which]["patch"]
                patch = patch.dimshuffle(1, 0, *range(2, patch.ndim))
                patch_extractor = theano.function(
                    [outputs[which][key] for key in "x x_shape".split()], [
                        outputs[which][key]
                        for key in "raw_location raw_scale".split()
                    ] + [patch])

                patchmonitor = patchmonitor_klass(
                    save_to="%s_patches_%s" % (hyperparameters["name"], which),
                    data_stream=task.get_stream(which,
                                                shuffle=False,
                                                num_examples=10),
                    every_n_batches=patchmonitor_interval,
                    extractor=patch_extractor,
                    map_to_input_space=attention.static_map_to_input_space)
                patchmonitor.save_patches("patchmonitor_test.png")
                extensions.append(patchmonitor)

    if plot_url:
        plot_channels = []
        plot_channels.extend(task.plot_channels())
        plot_channels.append(["train_cost"])
        #plot_channels.append(["train_%s" % step_channel.name for step_channel in step_channels])

        from blocks.extras.extensions.plot import Plot
        extensions.append(
            Plot(name,
                 channels=plot_channels,
                 after_epoch=True,
                 server_url=plot_url))

    return extensions
Beispiel #25
0
    def replace(self, replacements):
        """Replace certain variables in the computation graph.

        Parameters
        ----------
        replacements : dict
            The mapping from variables to be replaced to the corresponding
            substitutes.

        Examples
        --------
        >>> import theano
        >>> from theano import tensor, function
        >>> x = tensor.scalar('x')
        >>> y = x + 2
        >>> z = y + 3
        >>> a = z + 5

        Let's suppose we have dependent replacements like

        >>> replacements = {y: x * 2, z: y * 3}
        >>> cg = ComputationGraph([a])
        >>> theano.pprint(a)  # doctest: +NORMALIZE_WHITESPACE
        '(((x + TensorConstant{2}) + TensorConstant{3}) +
        TensorConstant{5})'
        >>> cg_new = cg.replace(replacements)
        >>> theano.pprint(
        ...     cg_new.outputs[0])  # doctest: +NORMALIZE_WHITESPACE
        '(((x * TensorConstant{2}) * TensorConstant{3}) +
        TensorConstant{5})'

        First two sums turned into multiplications

        >>> float(function(cg_new.inputs, cg_new.outputs)(3.)[0])
        23.0

        """
        # Due to theano specifics we have to make one replacement in time
        replacements = OrderedDict(replacements)

        outputs_cur = self.outputs

        # `replacements` with previous replacements applied. We have to track
        # variables in the new graph corresponding to original replacements.
        replacement_keys_cur = []
        replacement_vals_cur = []
        # Sort `replacements` in topological order
        # variables in self.variables are in topological order
        remaining_replacements = replacements.copy()
        for variable in self.variables:
            if variable in replacements:
                if has_roles(variable, [AUXILIARY]):
                    warnings.warn(
                        "replace method was asked to replace a variable ({}) "
                        "that is an auxiliary variable.".format(variable))
                replacement_keys_cur.append(variable)
                # self.variables should not contain duplicates,
                # otherwise pop() may fail.
                replacement_vals_cur.append(
                    remaining_replacements.pop(variable))

        # if remaining_replacements is not empty
        if remaining_replacements:
            warnings.warn(
                "replace method was asked to replace a variable(s) ({}) "
                "that is not a part of the computational "
                "graph.".format(str(remaining_replacements.keys())))

        # Replace step-by-step in topological order
        while replacement_keys_cur:
            replace_what = replacement_keys_cur[0]
            replace_by = replacement_vals_cur[0]
            # We also want to make changes in future replacements
            outputs_new = theano.clone(
                outputs_cur + replacement_keys_cur[1:] +
                replacement_vals_cur[1:],
                replace={replace_what: replace_by})
            # Reconstruct outputs, keys, and values
            outputs_cur = outputs_new[:len(outputs_cur)]
            replacement_keys_cur = outputs_new[len(outputs_cur):
                                               len(outputs_cur) +
                                               len(replacement_keys_cur) - 1]
            replacement_vals_cur = outputs_new[len(outputs_cur) +
                                               len(replacement_keys_cur):]

        return ComputationGraph(outputs_cur)