def construct_locator(self, locate_mlp_spec, n_spatial_dims, location_std, scale_std, batch_normalize, **kwargs): self.n_spatial_dims = n_spatial_dims self.locate_mlp = masonry.construct_mlp( name="locate_mlp", input_dim=self.get_dim(self.attention_state_name), hidden_dims=locate_mlp_spec, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0), batch_normalize=batch_normalize) self.theta_from_area = bricks.Linear( input_dim=self.locate_mlp.output_dim, output_dim=2*n_spatial_dims, name="theta_from_area", # normalize columns because the fan-in is large weights_init=initialization.NormalizedInitialization( initialization.IsotropicGaussian()), # initialize location biases to zero and scale biases to one # so the model will zoom in by default biases_init=initialization.Constant(np.array( [0.] * n_spatial_dims + [1.] * n_spatial_dims))) self.T_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(12345) self.location_std = location_std self.scale_std = scale_std self.children.extend([ self.locate_mlp, self.theta_from_area])
def __init__(self, hidden_dim, cropper, attention_state_name, hyperparameters, **kwargs): super(RecurrentAttentionModel, self).__init__(**kwargs) self.rnn = bricks.RecurrentStack( [bricks.LSTM(activation=bricks.Tanh(), dim=hidden_dim), bricks.LSTM(activation=bricks.Tanh(), dim=hidden_dim)], weights_init=initialization.NormalizedInitialization( initialization.IsotropicGaussian()), biases_init=initialization.Constant(0)) # name of the RNN state that determines the parameters of the next glimpse self.attention_state_name = attention_state_name self.cropper = cropper self.construct_locator(**hyperparameters) self.construct_merger(**hyperparameters) self.embedder = bricks.Linear( name="embedder", input_dim=self.response_mlp.output_dim, output_dim=4*self.rnn.get_dim("states"), use_bias=True, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(1)) # don't let blocks touch my children self.initialization_config_pushed = True self.children.extend([self.rnn, self.cropper, self.embedder]) # states aren't known until now self.apply.outputs = self.rnn.apply.outputs self.compute_initial_state.outputs = self.rnn.apply.outputs
def __init__(self, hidden_dim, cropper, attention_state_name, hyperparameters, **kwargs): # we're no longer a brick, but we still need to make sure we # initialize everything self.children = [] self.rnn = bricks.RecurrentStack( [ bricks.LSTM(activation=bricks.Tanh(), dim=hidden_dim), bricks.LSTM(activation=bricks.Tanh(), dim=hidden_dim) ], weights_init=initialization.NormalizedInitialization( initialization.IsotropicGaussian()), biases_init=initialization.Constant(0)) # name of the RNN state that determines the parameters of the next glimpse self.attention_state_name = attention_state_name self.cropper = cropper self.construct_locator(**hyperparameters) self.construct_merger(**hyperparameters) self.embedder = bricks.Linear(name="embedder", input_dim=self.response_mlp.output_dim, output_dim=self.rnn.get_dim("inputs"), use_bias=True, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0)) self.children.extend([self.rnn, self.cropper, self.embedder])
def construct_merger(self, n_spatial_dims, n_channels, patch_shape, response_dim, patch_cnn_spec, patch_mlp_spec, merge_mlp_spec, response_mlp_spec, batch_normalize, batch_normalize_patch, **kwargs): # construct patch interpretation network patch_transforms = [] if patch_cnn_spec: patch_transforms.append(masonry.construct_cnn( name="patch_cnn", layer_specs=patch_cnn_spec, input_shape=patch_shape, n_channels=n_channels, batch_normalize=batch_normalize_patch)) shape = patch_transforms[-1].get_dim("output") else: shape = (n_channels,) + tuple(patch_shape) patch_transforms.append(bricks.FeedforwardFlattener(input_shape=shape)) if patch_mlp_spec: patch_transforms.append(masonry.construct_mlp( name="patch_mlp", hidden_dims=patch_mlp_spec, input_dim=patch_transforms[-1].output_dim, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0), batch_normalize=batch_normalize_patch)) self.patch_transform = bricks.FeedforwardSequence( [brick.apply for brick in patch_transforms], name="ffs") # construct theta interpretation network self.merge_mlp = masonry.construct_mlp( name="merge_mlp", input_dim=2*n_spatial_dims, hidden_dims=merge_mlp_spec, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0), batch_normalize=batch_normalize) # construct what-where merger network self.response_merge = bricks.Merge( input_names="area patch".split(), input_dims=[self.merge_mlp.output_dim, self.patch_transform.output_dim], output_dim=response_dim, prototype=bricks.Linear( use_bias=False, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0)), child_prefix="response_merge") self.response_merge_activation = bricks.NormalizedActivation( shape=[response_dim], name="response_merge_activation", batch_normalize=batch_normalize) self.response_mlp = masonry.construct_mlp( name="response_mlp", hidden_dims=response_mlp_spec, input_dim=response_dim, weights_init=initialization.Orthogonal(), biases_init=initialization.Constant(0), batch_normalize=batch_normalize) self.children.extend([ self.response_merge_activation, self.response_merge, self.patch_transform, self.merge_mlp, self.response_mlp])