Esempio n. 1
0
    def construct_locator(self, locate_mlp_spec, n_spatial_dims,
                          location_std, scale_std, batch_normalize,
                          **kwargs):
        self.n_spatial_dims = n_spatial_dims

        self.locate_mlp = masonry.construct_mlp(
            name="locate_mlp",
            input_dim=self.get_dim(self.attention_state_name),
            hidden_dims=locate_mlp_spec,
            weights_init=initialization.Orthogonal(),
            biases_init=initialization.Constant(0),
            batch_normalize=batch_normalize)
        self.theta_from_area = bricks.Linear(
            input_dim=self.locate_mlp.output_dim,
            output_dim=2*n_spatial_dims,
            name="theta_from_area",
            # normalize columns because the fan-in is large
            weights_init=initialization.NormalizedInitialization(
                initialization.IsotropicGaussian()),
            # initialize location biases to zero and scale biases to one
            # so the model will zoom in by default
            biases_init=initialization.Constant(np.array(
                [0.] * n_spatial_dims + [1.] * n_spatial_dims)))

        self.T_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(12345)
        self.location_std = location_std
        self.scale_std = scale_std

        self.children.extend([
            self.locate_mlp,
            self.theta_from_area])
Esempio n. 2
0
    def __init__(self, hidden_dim, cropper,
                 attention_state_name, hyperparameters, **kwargs):
        super(RecurrentAttentionModel, self).__init__(**kwargs)

        self.rnn = bricks.RecurrentStack(
            [bricks.LSTM(activation=bricks.Tanh(), dim=hidden_dim),
             bricks.LSTM(activation=bricks.Tanh(), dim=hidden_dim)],
            weights_init=initialization.NormalizedInitialization(
                initialization.IsotropicGaussian()),
            biases_init=initialization.Constant(0))

        # name of the RNN state that determines the parameters of the next glimpse
        self.attention_state_name = attention_state_name

        self.cropper = cropper
        self.construct_locator(**hyperparameters)
        self.construct_merger(**hyperparameters)

        self.embedder = bricks.Linear(
            name="embedder",
            input_dim=self.response_mlp.output_dim,
            output_dim=4*self.rnn.get_dim("states"),
            use_bias=True,
            weights_init=initialization.Orthogonal(),
            biases_init=initialization.Constant(1))
        
        # don't let blocks touch my children
        self.initialization_config_pushed = True

        self.children.extend([self.rnn, self.cropper, self.embedder])

        # states aren't known until now
        self.apply.outputs = self.rnn.apply.outputs
        self.compute_initial_state.outputs = self.rnn.apply.outputs
Esempio n. 3
0
    def __init__(self, hidden_dim, cropper, attention_state_name,
                 hyperparameters, **kwargs):
        # we're no longer a brick, but we still need to make sure we
        # initialize everything
        self.children = []

        self.rnn = bricks.RecurrentStack(
            [
                bricks.LSTM(activation=bricks.Tanh(), dim=hidden_dim),
                bricks.LSTM(activation=bricks.Tanh(), dim=hidden_dim)
            ],
            weights_init=initialization.NormalizedInitialization(
                initialization.IsotropicGaussian()),
            biases_init=initialization.Constant(0))

        # name of the RNN state that determines the parameters of the next glimpse
        self.attention_state_name = attention_state_name

        self.cropper = cropper
        self.construct_locator(**hyperparameters)
        self.construct_merger(**hyperparameters)

        self.embedder = bricks.Linear(name="embedder",
                                      input_dim=self.response_mlp.output_dim,
                                      output_dim=self.rnn.get_dim("inputs"),
                                      use_bias=True,
                                      weights_init=initialization.Orthogonal(),
                                      biases_init=initialization.Constant(0))

        self.children.extend([self.rnn, self.cropper, self.embedder])
Esempio n. 4
0
    def construct_merger(self, n_spatial_dims, n_channels,
                         patch_shape, response_dim, patch_cnn_spec,
                         patch_mlp_spec, merge_mlp_spec,
                         response_mlp_spec, batch_normalize,
                         batch_normalize_patch, **kwargs):
        # construct patch interpretation network
        patch_transforms = []
        if patch_cnn_spec:
            patch_transforms.append(masonry.construct_cnn(
                name="patch_cnn",
                layer_specs=patch_cnn_spec,
                input_shape=patch_shape,
                n_channels=n_channels,
                batch_normalize=batch_normalize_patch))
            shape = patch_transforms[-1].get_dim("output")
        else:
            shape = (n_channels,) + tuple(patch_shape)
        patch_transforms.append(bricks.FeedforwardFlattener(input_shape=shape))
        if patch_mlp_spec:
            patch_transforms.append(masonry.construct_mlp(
                name="patch_mlp",
                hidden_dims=patch_mlp_spec,
                input_dim=patch_transforms[-1].output_dim,
                weights_init=initialization.Orthogonal(),
                biases_init=initialization.Constant(0),
                batch_normalize=batch_normalize_patch))
        self.patch_transform = bricks.FeedforwardSequence(
            [brick.apply for brick in patch_transforms], name="ffs")

        # construct theta interpretation network
        self.merge_mlp = masonry.construct_mlp(
            name="merge_mlp",
            input_dim=2*n_spatial_dims,
            hidden_dims=merge_mlp_spec,
            weights_init=initialization.Orthogonal(),
            biases_init=initialization.Constant(0),
            batch_normalize=batch_normalize)

        # construct what-where merger network
        self.response_merge = bricks.Merge(
            input_names="area patch".split(),
            input_dims=[self.merge_mlp.output_dim,
                        self.patch_transform.output_dim],
            output_dim=response_dim,
            prototype=bricks.Linear(
                use_bias=False,
                weights_init=initialization.Orthogonal(),
                biases_init=initialization.Constant(0)),
            child_prefix="response_merge")
        self.response_merge_activation = bricks.NormalizedActivation(
            shape=[response_dim],
            name="response_merge_activation",
            batch_normalize=batch_normalize)

        self.response_mlp = masonry.construct_mlp(
            name="response_mlp",
            hidden_dims=response_mlp_spec,
            input_dim=response_dim,
            weights_init=initialization.Orthogonal(),
            biases_init=initialization.Constant(0),
            batch_normalize=batch_normalize)

        self.children.extend([
            self.response_merge_activation,
            self.response_merge,
            self.patch_transform,
            self.merge_mlp,
            self.response_mlp])