Ejemplo n.º 1
0
def check_models(models):
    """Check if all models in the list are roughly the same."""
    layers_list = [get_all_layers(m) for m in models]
    n = len(layers_list[0])
    assert all(n == len(l) for l in layers_list)
    for layers in zip(*layers_list):
        first, *rest = layers
        assert all(check_layer(first, c) for c in rest)
Ejemplo n.º 2
0
 def __init__(self, output_layer, description="", tags=None,
              predecessor_experiment=""):
     self.layers = get_all_layers(output_layer)
     self._deterministic_output_func = None
     self.train_iterations = 0
     self.description = description
     self.tags = none_to_list(tags)
     for tag in self.tags:
         if tag not in VALID_TAGS:
             raise ValueError("{} is not a valid tag!".format(tag))
     self.predecessor_experiment = predecessor_experiment
Ejemplo n.º 3
0
 def __init__(self,
              output_layer,
              description="",
              tags=None,
              predecessor_experiment=""):
     self.layers = get_all_layers(output_layer)
     self._deterministic_output_func = None
     self.train_iterations = 0
     self.description = description
     self.tags = none_to_list(tags)
     for tag in self.tags:
         if tag not in VALID_TAGS:
             raise ValueError("{} is not a valid tag!".format(tag))
     self.predecessor_experiment = predecessor_experiment
Ejemplo n.º 4
0
    def __init__(self, *args, **kwargs):
        super(TrainerMixin, self).__init__(*args, **kwargs)
        input_var = tensor.tensor4('inputs')
        target_var = tensor.ivector('targets')

        loss, _ = loss_acc(self.model,
                           input_var,
                           target_var,
                           deterministic=False)
        layers = get_all_layers(self.model)
        decay = regularize_layer_params(layers, l2) * 0.0001
        loss = loss + decay

        params = get_all_params(self.model, trainable=True)
        updates = momentum(loss,
                           params,
                           momentum=0.9,
                           learning_rate=self.learning_rate)
        self.set_training(input_var, target_var, loss, updates)
Ejemplo n.º 5
0
    def fit(self, X, X_valid=None):
        m = 1

        # define the model
        x_in = layers.InputLayer((None, X.shape[1]))
        hid = x_in

        for i in range(self.nb_layers):
            hid = layers.DenseLayer(hid, num_units=self.nb_units*m/(2**i),
                                    nonlinearity=nonlinearities.sigmoid)
        for i in range(self.nb_layers - 1):
            k = self.nb_layers - 2 - i
            hid = layers.DenseLayer(hid, num_units=self.nb_units*m/(2**k),
                                    nonlinearity=nonlinearities.sigmoid)
        o = layers.DenseLayer(hid, num_units=X.shape[1],
                            nonlinearity=nonlinearities.sigmoid)
        model = LightweightModel([x_in], [o])

        all_layers = get_all_layers(o)
        self.all_layers = all_layers
        rng = rng_mrg.MRG_RandomStreams()

        def get_reconstruction_error(model, X, x_hat=None):
            if x_hat is None:
                x_hat, = model.get_output(X)

            return (-(X * T.log(x_hat) +
                    (1 - X) * T.log(1 - x_hat)).sum(axis=1).mean())

        def loss_function(model, tensors):
            X = tensors["X"]
            X_noisy = X
            #X_noisy = X * (rng.uniform(X.shape) < (1 - self.corruption))
            #if self.corruption_type == "masking_noise":
            #    X_noisy = corrupted_masking_noise(rng, X, self.corruption)
            #elif self.corruption_type == "salt_and_pepper":
            #    X_noisy = corrupted_salt_and_pepper(rng, X, self.corruption)
            x_hat, = model.get_output(X_noisy)
    #       l1 = 0.01 * sum( T.abs_(layer.W).sum() for layer in all_layers[1:-1])
            l1 = 0
            diversity = 0
            return get_reconstruction_error(model, X, x_hat) + diversity + l1

        input_variables = dict(
            X=dict(tensor_type=T.matrix),
        )

        functions = dict(
            predict=dict(
                get_output=lambda model, X:model.get_output(X)[0],
                params=["X"]
            ),
            get_reconstruction_error=dict(
                get_output=get_reconstruction_error,
                params=["X"]
            )
        )

        for i, layer in enumerate(all_layers[1:-1]):
            functions["get_layer_{0}".format(i + 1)] = dict(get_output=lambda model, X: model.get_output(X)[0],
                                                            params=["X"])

        class MyBatchOptimizer(BatchOptimizer):

            def iter_update(self, epoch, nb_batches, iter_update_batch):
                status = super(MyBatchOptimizer, self).iter_update(epoch, nb_batches, iter_update_batch)
                status["reconstruction_error_train"] = capsule.get_reconstruction_error(X[0:100])
                if X_valid is not None:
                    status["reconstruction_error_valid"] = capsule.get_reconstruction_error(X_valid[0:100])
                return status
                for i, layer in enumerate(all_layers[1:-1]):
                    getter = getattr(capsule, "get_layer_{0}".format(i + 1))
                    activations = getter(X)
                    status["activations_{0}_mean".format(i + 1)] = activations.mean()
                    status["activations_{0}_std".format(i + 1)] = activations.std()

                return status

        batch_optimizer = MyBatchOptimizer(
            verbose=1,
            max_nb_epochs=self.max_epochs,
            batch_size=self.batch_size,
            optimization_procedure=(
                   updates.adagrad,
                   {"learning_rate": self.learning_rate}
            ),
            whole_dataset_in_device=True
        )
        capsule = Capsule(
            input_variables, model,
            loss_function,
            functions=functions,
            batch_optimizer=batch_optimizer,
        )

        capsule.fit(X=X)
        self.capsule = capsule
    def __init__(self, incoming, input_to_hidden, hidden_to_hidden,
                 nonlinearity=nonlinearities.rectify,
                 hid_init=init.Constant(0.),
                 backwards=False,
                 learn_init=False,
                 gradient_steps=-1,
                 grad_clipping=0,
                 unroll_scan=False,
                 precompute_input=True,
                 mask_input=None,
                 only_return_final=False,
                 gamma=0.9,
                 **kwargs):

        # This layer inherits from a MergeLayer, because it can have three
        # inputs - the layer input, the mask and the initial hidden state.  We
        # will just provide the layer input as incomings, unless a mask input
        # or initial hidden state was provided.
        incomings = [incoming]
        self.mask_incoming_index = -1
        self.hid_init_incoming_index = -1
        if mask_input is not None:
            incomings.append(mask_input)
            self.mask_incoming_index = len(incomings)-1
        if isinstance(hid_init, Layer):
            incomings.append(hid_init)
            self.hid_init_incoming_index = len(incomings)-1

        super(CustomRecurrentLayerWithFastWeights, self).__init__(incomings, **kwargs)

        input_to_hidden_in_layers = \
            [layer for layer in helper.get_all_layers(input_to_hidden)
             if isinstance(layer, InputLayer)]
        if len(input_to_hidden_in_layers) != 1:
            raise ValueError(
                '`input_to_hidden` must have exactly one InputLayer, but it '
                'has {}'.format(len(input_to_hidden_in_layers)))

        hidden_to_hidden_in_lyrs = \
            [layer for layer in helper.get_all_layers(hidden_to_hidden)
             if isinstance(layer, InputLayer)]
        if len(hidden_to_hidden_in_lyrs) != 1:
            raise ValueError(
                '`hidden_to_hidden` must have exactly one InputLayer, but it '
                'has {}'.format(len(hidden_to_hidden_in_lyrs)))
        hidden_to_hidden_in_layer = hidden_to_hidden_in_lyrs[0]

        self.input_to_hidden = input_to_hidden
        self.hidden_to_hidden = hidden_to_hidden
        self.learn_init = learn_init
        self.backwards = backwards
        self.gradient_steps = gradient_steps
        self.grad_clipping = grad_clipping
        self.unroll_scan = unroll_scan
        self.precompute_input = precompute_input
        self.only_return_final = only_return_final
        self.gamma = gamma

        if unroll_scan and gradient_steps != -1:
            raise ValueError(
                "Gradient steps must be -1 when unroll_scan is true.")

        # Retrieve the dimensionality of the incoming layer
        input_shape = self.input_shapes[0]

        if unroll_scan and input_shape[1] is None:
            raise ValueError("Input sequence length cannot be specified as "
                             "None when unroll_scan is True")

        # Check that the input_to_hidden connection can appropriately handle
        # a first dimension of input_shape[0]*input_shape[1] when we will
        # precompute the input dot product
        if (self.precompute_input and
                input_to_hidden.output_shape[0] is not None and
                input_shape[0] is not None and
                input_shape[1] is not None and
                (input_to_hidden.output_shape[0] !=
                 input_shape[0]*input_shape[1])):
            raise ValueError(
                'When precompute_input == True, '
                'input_to_hidden.output_shape[0] must equal '
                'incoming.output_shape[0]*incoming.output_shape[1] '
                '(i.e. batch_size*sequence_length) or be None but '
                'input_to_hidden.output_shape[0] = {} and '
                'incoming.output_shape[0]*incoming.output_shape[1] = '
                '{}'.format(input_to_hidden.output_shape[0],
                            input_shape[0]*input_shape[1]))

        # Check that the first dimension of input_to_hidden and
        # hidden_to_hidden's outputs match when we won't precompute the input
        # dot product
        if (not self.precompute_input and
                input_to_hidden.output_shape[0] is not None and
                hidden_to_hidden.output_shape[0] is not None and
                (input_to_hidden.output_shape[0] !=
                 hidden_to_hidden.output_shape[0])):
            raise ValueError(
                'When precompute_input == False, '
                'input_to_hidden.output_shape[0] must equal '
                'hidden_to_hidden.output_shape[0] but '
                'input_to_hidden.output_shape[0] = {} and '
                'hidden_to_hidden.output_shape[0] = {}'.format(
                    input_to_hidden.output_shape[0],
                    hidden_to_hidden.output_shape[0]))

        # Check that input_to_hidden and hidden_to_hidden output shapes match,
        # but don't check a dimension if it's None for either shape
        if not all(s1 is None or s2 is None or s1 == s2
                   for s1, s2 in zip(input_to_hidden.output_shape[1:],
                                     hidden_to_hidden.output_shape[1:])):
            raise ValueError("The output shape for input_to_hidden and "
                             "hidden_to_hidden must be equal after the first "
                             "dimension, but input_to_hidden.output_shape={} "
                             "and hidden_to_hidden.output_shape={}".format(
                                 input_to_hidden.output_shape,
                                 hidden_to_hidden.output_shape))

        # Check that input_to_hidden's output shape is the same as
        # hidden_to_hidden's input shape but don't check a dimension if it's
        # None for either shape
        h_to_h_input_shape = hidden_to_hidden_in_layer.output_shape
        if not all(s1 is None or s2 is None or s1 == s2
                   for s1, s2 in zip(input_to_hidden.output_shape[1:],
                                     h_to_h_input_shape[1:])):
            raise ValueError(
                "The output shape for input_to_hidden must be equal to the "
                "input shape of hidden_to_hidden after the first dimension, "
                "but input_to_hidden.output_shape={} and "
                "hidden_to_hidden:input_layer.shape={}".format(
                    input_to_hidden.output_shape, h_to_h_input_shape))

        if nonlinearity is None:
            self.nonlinearity = nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

        # Initialize hidden state
        if isinstance(hid_init, Layer):
            self.hid_init = hid_init
        else:
            self.hid_init = self.add_param(
                hid_init, (1,) + hidden_to_hidden.output_shape[1:],
                name="hid_init", trainable=learn_init, regularizable=False)
Ejemplo n.º 7
0
def load_random_streams(model, path):
    """Load the random streams from a file into a model."""
    layers = [l for l in get_all_layers(model) if hasattr(l, '_srng')]
    with numpy.load(path) as fobj:
        for i, layer in enumerate(layers):
            layer._srng = RandomStreams(fobj[f'seed_{i}'].item())
Ejemplo n.º 8
0
def get_random_streams(model):
    """Return a list with all ``RandomStreams`` in the model."""
    return [l._srng for l in get_all_layers(model) if hasattr(l, '_srng')]
    def __init__(self, incoming, input_to_hidden, hidden_to_hidden,
                 nonlinearity=nonlinearities.rectify,
                 hid_init=init.Constant(0.),
                 backwards=False,
                 learn_init=False,
                 gradient_steps=-1,
                 grad_clipping=0,
                 unroll_scan=False,
                 precompute_input=True,
                 mask_input=None,
                 only_return_final=False,
                 **kwargs):

        # This layer inherits from a MergeLayer, because it can have three
        # inputs - the layer input, the mask and the initial hidden state.  We
        # will just provide the layer input as incomings, unless a mask input
        # or initial hidden state was provided.
        incomings = [incoming]
        self.mask_incoming_index = -1
        self.hid_init_incoming_index = -1
        if mask_input is not None:
            incomings.append(mask_input)
            self.mask_incoming_index = len(incomings)-1
        if isinstance(hid_init, Layer):
            incomings.append(hid_init)
            self.hid_init_incoming_index = len(incomings)-1

        super(onlyRecurrentLayer, self).__init__(incomings, **kwargs)

        input_to_hidden_in_layers = \
            [layer for layer in helper.get_all_layers(input_to_hidden)
             if isinstance(layer, InputLayer)]
        if len(input_to_hidden_in_layers) != 1:
            raise ValueError(
                '`input_to_hidden` must have exactly one InputLayer, but it '
                'has {}'.format(len(input_to_hidden_in_layers)))

        hidden_to_hidden_in_lyrs = \
            [layer for layer in helper.get_all_layers(hidden_to_hidden)
             if isinstance(layer, InputLayer)]
        if len(hidden_to_hidden_in_lyrs) != 1:
            raise ValueError(
                '`hidden_to_hidden` must have exactly one InputLayer, but it '
                'has {}'.format(len(hidden_to_hidden_in_lyrs)))
        hidden_to_hidden_in_layer = hidden_to_hidden_in_lyrs[0]

        self.input_to_hidden = input_to_hidden
        self.hidden_to_hidden = hidden_to_hidden
        self.learn_init = learn_init
        self.backwards = backwards
        self.gradient_steps = gradient_steps
        self.grad_clipping = grad_clipping
        self.unroll_scan = unroll_scan
        self.precompute_input = precompute_input
        self.only_return_final = only_return_final
        

        if unroll_scan and gradient_steps != -1:
            raise ValueError(
                "Gradient steps must be -1 when unroll_scan is true.")

        # Retrieve the dimensionality of the incoming layer
        input_shape = self.input_shapes[0]

        if nonlinearity is None:
            self.nonlinearity = nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

        # Initialize hidden state
        if isinstance(hid_init, Layer):
            self.hid_init = hid_init
        else:
            self.hid_init = self.add_param(
                hid_init, (1,) + hidden_to_hidden.output_shape[1:],
                name="hid_init", trainable=learn_init, regularizable=False)