Beispiel #1
0
    def do(self, which_callback, *args):
        if which_callback == 'before_training':
            logger.info("Compiling prediction generator...")
            recognizer, = self.main_loop.model.get_top_bricks()
            self.trained_recognizer = recognizer
            self.recognizer = copy.deepcopy(recognizer)

            # A bit of defensive programming, because why not :)
            assert self.recognizer.generator.readout.compute_targets
            assert self.recognizer.generator.readout.trpo_coef == 0.0
            assert self.recognizer.generator.readout.solve_bellman
            assert self.recognizer.generator.readout.epsilon == 0.0

            groundtruth = self.recognizer.labels
            groundtruth_mask = self.recognizer.labels_mask
            generated = self.recognizer.get_generate_graph(
                n_steps=self.recognizer.labels.shape[0] +
                self.extra_generation_steps,
                return_initial_states=True,
                use_softmax_t=True)
            generation_method = self.recognizer.generator.generate

            if not self.force_generate_groundtruth:
                prediction = generated.pop('samples')
                prediction_mask = self.recognizer.mask_for_prediction(
                    prediction, groundtruth_mask, self.extra_generation_steps)
            else:
                prediction = groundtruth.copy()
                prediction_mask = groundtruth_mask.copy()
            prediction.name = 'predicted_labels'
            prediction_mask.name = 'predicted_mask'

            cg = ComputationGraph(generated.values())
            attended, = VariableFilter(applications=[generation_method],
                                       name='attended')(cg)
            attended_mask, = VariableFilter(applications=[generation_method],
                                            name='attended_mask')(cg)
            generated = {key: value[:-1] for key, value in generated.items()}
            costs = self.recognizer.generator.readout.costs(
                prediction=prediction,
                prediction_mask=prediction_mask,
                groundtruth=groundtruth,
                groundtruth_mask=groundtruth_mask,
                attended=attended,
                attended_mask=attended_mask,
                **generated)
            cost_cg = ComputationGraph(costs)
            value_targets, = VariableFilter(name='value_targets')(cost_cg)
            value_targets.name = 'value_targets'
            probs, = VariableFilter(name='probs')(cost_cg)
            probs.name = 'probs'
            rewards, = VariableFilter(name='rewards')(cost_cg)

            variables_to_compute = [prediction, prediction_mask]
            if self.compute_targets:
                logger.debug("Also compute the targets")
                variables_to_compute += [value_targets]
            if self.compute_policy:
                variables_to_compute += [probs]
            self.extended_cg = ComputationGraph(variables_to_compute)
            self._generate = self.extended_cg.get_theano_function()
            logger.info("Prediction generator compiled")

            params = Selector(self.recognizer).get_parameters()
            trained_params = Selector(self.trained_recognizer).get_parameters()
            if self.catching_up_freq:

                def get_coof(name):
                    if isinstance(self.catching_up_coof, float):
                        return self.catching_up_coof
                    elif isinstance(self.catching_up_coof, list):
                        result = None
                        for pattern, coof in self.catching_up_coof:
                            if re.match(pattern, name):
                                result = coof
                        return result
                    else:
                        raise ValueError

                updates = []
                for name in params:
                    coof = get_coof(name)
                    logging.debug(
                        "Catching up coefficient for {} is {}".format(
                            name, coof))
                    updates.append((params[name], params[name] * (1 - coof) +
                                    trained_params[name] * coof))
                # This is needed when parameters are shared between brick
                # and occur more than once in the list of updates.
                updates = dict(updates).items()
                self._catch_up = theano.function([], [], updates=updates)
        elif which_callback == 'before_batch':
            batch, = args
            generated = self._generate(
                *
                [batch[variable.name] for variable in self.extended_cg.inputs])
            for variable, value in zip(self.extended_cg.outputs, generated):
                batch[variable.name] = value
        elif which_callback == 'after_batch':
            if (self.catching_up_freq
                    and self.main_loop.status['iterations_done'] %
                    self.catching_up_freq == 0):
                self._catch_up()
        else:
            raise ValueError("can't be called on " + which_callback)
Beispiel #2
0
from blocks.bricks.cost import CategoricalCrossEntropy, MisclassificationRate
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)


from blocks.roles import WEIGHT
from blocks.graph import ComputationGraph
from blocks.filter import VariableFilter
cg = ComputationGraph(cost)
W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
L1,L2 = 0.05, 0.05
cost = cost + L1 * (W1 ** 2).sum() + L2 * (W2 ** 2).sum()
cost.name = 'cost_with_regularization'

from blocks.bricks import MLP
mlp = MLP(activations=[Rectifier(), Softmax()], dims=[input_dim,num_hidden_nodes, 2]).apply(x)
W1.name = 'W1'


from blocks.initialization import IsotropicGaussian, Constant
hidden_to_output.weights_init = IsotropicGaussian(0.01)
input_to_hidden.weights_init = hidden_to_output.weights_init
hidden_to_output.biases_init = Constant(0)
input_to_hidden.biases_init = hidden_to_output.biases_init
input_to_hidden.initialize()
hidden_to_output.initialize()


#TRAINING DATA
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme
from fuel.transformers import Flatten