예제 #1
0
    def construct_merger(self, n_spatial_dims, n_channels, patch_shape,
                         patch_cnn_spec, patch_mlp_spec, merge_mlp_spec,
                         response_mlp_spec, batch_normalize,
                         batch_normalize_patch, task_name, hyperparameters,
                         **kwargs):
        # construct patch interpretation network
        patch_transforms = []
        if task_name == "featurelevel_ucf101":
            n_channels = 512 + 4096
            shape = self.cropper.output_shape
        else:
            if patch_cnn_spec == "pretrained":
                import pretrained
                patch_transforms.append(
                    pretrained.get_patch_transform(**hyperparameters))
                shape = patch_transforms[-1].get_dim("output")
            elif patch_cnn_spec:
                patch_transforms.append(
                    masonry.construct_cnn(
                        name="patch_cnn",
                        layer_specs=patch_cnn_spec,
                        input_shape=patch_shape,
                        n_channels=n_channels,
                        batch_normalize=batch_normalize_patch))
                shape = patch_transforms[-1].get_dim("output")
        patch_transforms.append(bricks.FeedforwardFlattener(input_shape=shape))
        if patch_mlp_spec:
            patch_transforms.append(
                masonry.construct_mlp(
                    name="patch_mlp",
                    hidden_dims=patch_mlp_spec,
                    input_dim=patch_transforms[-1].output_dim,
                    weights_init=initialization.Orthogonal(),
                    biases_init=initialization.Constant(0),
                    batch_normalize=batch_normalize_patch))
        self.patch_transform = bricks.FeedforwardSequence(
            [brick.apply for brick in patch_transforms], name="ffs")

        # construct theta interpretation network
        self.merge_mlp = masonry.construct_mlp(
            name="merge_mlp",
            input_dim=2 * n_spatial_dims,
            hidden_dims=merge_mlp_spec,
            weights_init=initialization.Orthogonal(),
            biases_init=initialization.Constant(0),
            batch_normalize=batch_normalize)

        self.response_mlp = masonry.construct_mlp(
            name="response_mlp",
            hidden_dims=response_mlp_spec,
            input_dim=self.patch_transform.output_dim +
            self.merge_mlp.output_dim,
            weights_init=initialization.Orthogonal(),
            biases_init=initialization.Constant(0),
            batch_normalize=batch_normalize)

        self.children.extend(
            [self.patch_transform, self.merge_mlp, self.response_mlp])
예제 #2
0
    def __init__(self, hidden_dim, cropper,
                 attention_state_name, hyperparameters, **kwargs):
        super(RecurrentAttentionModel, self).__init__(**kwargs)

        self.rnn = bricks.RecurrentStack(
            [bricks.LSTM(activation=bricks.Tanh(), dim=hidden_dim),
             bricks.LSTM(activation=bricks.Tanh(), dim=hidden_dim)],
            weights_init=initialization.NormalizedInitialization(
                initialization.IsotropicGaussian()),
            biases_init=initialization.Constant(0))

        # name of the RNN state that determines the parameters of the next glimpse
        self.attention_state_name = attention_state_name

        self.cropper = cropper
        self.construct_locator(**hyperparameters)
        self.construct_merger(**hyperparameters)

        self.embedder = bricks.Linear(
            name="embedder",
            input_dim=self.response_mlp.output_dim,
            output_dim=4*self.rnn.get_dim("states"),
            use_bias=True,
            weights_init=initialization.Orthogonal(),
            biases_init=initialization.Constant(1))
        
        # don't let blocks touch my children
        self.initialization_config_pushed = True

        self.children.extend([self.rnn, self.cropper, self.embedder])

        # states aren't known until now
        self.apply.outputs = self.rnn.apply.outputs
        self.compute_initial_state.outputs = self.rnn.apply.outputs
예제 #3
0
    def construct_locator(self, locate_mlp_spec, n_spatial_dims,
                          location_std, scale_std, batch_normalize,
                          **kwargs):
        self.n_spatial_dims = n_spatial_dims

        self.locate_mlp = masonry.construct_mlp(
            name="locate_mlp",
            input_dim=self.get_dim(self.attention_state_name),
            hidden_dims=locate_mlp_spec,
            weights_init=initialization.Orthogonal(),
            biases_init=initialization.Constant(0),
            batch_normalize=batch_normalize)
        self.theta_from_area = bricks.Linear(
            input_dim=self.locate_mlp.output_dim,
            output_dim=2*n_spatial_dims,
            name="theta_from_area",
            # normalize columns because the fan-in is large
            weights_init=initialization.NormalizedInitialization(
                initialization.IsotropicGaussian()),
            # initialize location biases to zero and scale biases to one
            # so the model will zoom in by default
            biases_init=initialization.Constant(np.array(
                [0.] * n_spatial_dims + [1.] * n_spatial_dims)))

        self.T_rng = theano.sandbox.rng_mrg.MRG_RandomStreams(12345)
        self.location_std = location_std
        self.scale_std = scale_std

        self.children.extend([
            self.locate_mlp,
            self.theta_from_area])
예제 #4
0
def construct_cnn(name, layer_specs, n_channels, input_shape, batch_normalize):
    ndim = len(input_shape)
    conv_module = {
        2: conv2d,
        3: conv3d,
    }[ndim]
    cnn = conv_module.ConvolutionalSequence(
        name=name,
        layers=[
            construct_cnn_layer(name="%s_%i" % (name, i),
                                layer_spec=layer_spec,
                                ndim=ndim,
                                conv_module=conv_module,
                                batch_normalize=batch_normalize)
            for i, layer_spec in enumerate(layer_specs)
        ],
        num_channels=n_channels,
        image_size=tuple(input_shape),
        weights_init=initialization.ConvolutionalInitialization(
            initialization.Orthogonal()),
        # our activation function will handle the bias
        use_bias=False)
    # ensure output dim is determined
    cnn.push_allocation_config()
    # tell the activations what shapes they'll be dealing with
    for layer in cnn.layers:
        activation = util.get_conv_activation(layer, conv_module)
        assert isinstance(activation, bricks.NormalizedActivation)
        activation.shape = layer.get_dim("output")
        activation.broadcastable = [False] + ndim * [True]
    cnn.initialize()
    return cnn
예제 #5
0
    def __init__(self, hidden_dim, cropper, attention_state_name,
                 hyperparameters, **kwargs):
        # we're no longer a brick, but we still need to make sure we
        # initialize everything
        self.children = []

        self.rnn = bricks.RecurrentStack(
            [
                bricks.LSTM(activation=bricks.Tanh(), dim=hidden_dim),
                bricks.LSTM(activation=bricks.Tanh(), dim=hidden_dim)
            ],
            weights_init=initialization.NormalizedInitialization(
                initialization.IsotropicGaussian()),
            biases_init=initialization.Constant(0))

        # name of the RNN state that determines the parameters of the next glimpse
        self.attention_state_name = attention_state_name

        self.cropper = cropper
        self.construct_locator(**hyperparameters)
        self.construct_merger(**hyperparameters)

        self.embedder = bricks.Linear(name="embedder",
                                      input_dim=self.response_mlp.output_dim,
                                      output_dim=self.rnn.get_dim("inputs"),
                                      use_bias=True,
                                      weights_init=initialization.Orthogonal(),
                                      biases_init=initialization.Constant(0))

        self.children.extend([self.rnn, self.cropper, self.embedder])
예제 #6
0
def construct_cnn_layer(name, layer_spec, conv_module, ndim, batch_normalize):
    type_ = layer_spec.pop("type", "conv")
    if type_ == "pool":
        layer = conv_module.MaxPooling(
            name=name,
            pooling_size=layer_spec.pop("size", (1, ) * ndim),
            step=layer_spec.pop("step", (1, ) * ndim))
    elif type_ == "conv":
        border_mode = layer_spec.pop("border_mode", (0, ) * ndim)
        if not isinstance(border_mode, basestring):
            # conv bricks barf on list-type shape arguments :/
            border_mode = tuple(border_mode)
        activation = bricks.NormalizedActivation(
            name="activation", batch_normalize=batch_normalize)
        layer = conv_module.ConvolutionalActivation(
            name=name,
            activation=activation.apply,
            filter_size=tuple(layer_spec.pop("size", (1, ) * ndim)),
            step=tuple(layer_spec.pop("step", (1, ) * ndim)),
            num_filters=layer_spec.pop("num_filters", 1),
            border_mode=border_mode,
            # our activation function will handle the bias
            use_bias=False)
        # sigh. really REALLY do not use biases
        layer.convolution.use_bias = False
        layer.convolution.weights_init = initialization.ConvolutionalInitialization(
            initialization.Orthogonal())
        layer.convolution.biases_init = initialization.Constant(0)
    if layer_spec:
        logger.warn("ignoring unknown layer specification keys [%s]" %
                    " ".join(layer_spec.keys()))
    return layer
예제 #7
0
    def __init__(self, input_dim, n_classes, batch_normalize):
        self.input_dim = input_dim
        self.n_classes = n_classes

        self.mlp = masonry.construct_mlp(
            name="mlp",
            activations=[None, bricks.Identity()],
            input_dim=input_dim,
            hidden_dims=[input_dim / 2, self.n_classes],
            batch_normalize=batch_normalize,
            weights_init=initialization.Orthogonal(),
            biases_init=initialization.Constant(0))
        self.softmax = bricks.Softmax()

        self.children = [self.mlp, self.softmax]
예제 #8
0
    def __init__(self, input_dim, n_classes, batch_normalize):
        self.input_dim = input_dim
        self.n_classes = n_classes

        # TODO: use TensorLinear or some such
        self.emitters = [
            masonry.construct_mlp(
                activations=[None, bricks.Identity()],
                input_dim=input_dim,
                hidden_dims=[input_dim/2, n],
                name="mlp_%i" % i,
                batch_normalize=batch_normalize,
                weights_init=initialization.Orthogonal(),
                biases_init=initialization.Constant(0))
            for i, n in enumerate(self.n_classes)]
        self.softmax = bricks.Softmax()

        self.children = self.emitters + [self.softmax]
예제 #9
0
def construct_mlp(name,
                  hidden_dims,
                  input_dim,
                  batch_normalize,
                  activations=None,
                  weights_init=None,
                  biases_init=None):
    if not hidden_dims:
        return bricks.FeedforwardIdentity(dim=input_dim)

    if not activations:
        activations = [bricks.Rectifier() for dim in hidden_dims]
    elif not isinstance(activations, collections.Iterable):
        activations = [activations] * len(hidden_dims)
    assert len(activations) == len(hidden_dims)

    if not weights_init:
        weights_init = initialization.Orthogonal()
    if not biases_init:
        biases_init = initialization.Constant(0)

    dims = [input_dim] + hidden_dims
    wrapped_activations = [
        bricks.NormalizedActivation(shape=[hidden_dim],
                                    name="activation_%i" % i,
                                    batch_normalize=batch_normalize,
                                    activation=activation)
        for i, (hidden_dim,
                activation) in enumerate(zip(hidden_dims, activations))
    ]
    mlp = bricks.MLP(
        name=name,
        activations=wrapped_activations,
        # biases are handled by our activation function
        use_bias=False,
        dims=dims,
        weights_init=weights_init,
        biases_init=biases_init)
    return mlp
예제 #10
0
    def construct_merger(self, n_spatial_dims, n_channels,
                         patch_shape, response_dim, patch_cnn_spec,
                         patch_mlp_spec, merge_mlp_spec,
                         response_mlp_spec, batch_normalize,
                         batch_normalize_patch, **kwargs):
        # construct patch interpretation network
        patch_transforms = []
        if patch_cnn_spec:
            patch_transforms.append(masonry.construct_cnn(
                name="patch_cnn",
                layer_specs=patch_cnn_spec,
                input_shape=patch_shape,
                n_channels=n_channels,
                batch_normalize=batch_normalize_patch))
            shape = patch_transforms[-1].get_dim("output")
        else:
            shape = (n_channels,) + tuple(patch_shape)
        patch_transforms.append(bricks.FeedforwardFlattener(input_shape=shape))
        if patch_mlp_spec:
            patch_transforms.append(masonry.construct_mlp(
                name="patch_mlp",
                hidden_dims=patch_mlp_spec,
                input_dim=patch_transforms[-1].output_dim,
                weights_init=initialization.Orthogonal(),
                biases_init=initialization.Constant(0),
                batch_normalize=batch_normalize_patch))
        self.patch_transform = bricks.FeedforwardSequence(
            [brick.apply for brick in patch_transforms], name="ffs")

        # construct theta interpretation network
        self.merge_mlp = masonry.construct_mlp(
            name="merge_mlp",
            input_dim=2*n_spatial_dims,
            hidden_dims=merge_mlp_spec,
            weights_init=initialization.Orthogonal(),
            biases_init=initialization.Constant(0),
            batch_normalize=batch_normalize)

        # construct what-where merger network
        self.response_merge = bricks.Merge(
            input_names="area patch".split(),
            input_dims=[self.merge_mlp.output_dim,
                        self.patch_transform.output_dim],
            output_dim=response_dim,
            prototype=bricks.Linear(
                use_bias=False,
                weights_init=initialization.Orthogonal(),
                biases_init=initialization.Constant(0)),
            child_prefix="response_merge")
        self.response_merge_activation = bricks.NormalizedActivation(
            shape=[response_dim],
            name="response_merge_activation",
            batch_normalize=batch_normalize)

        self.response_mlp = masonry.construct_mlp(
            name="response_mlp",
            hidden_dims=response_mlp_spec,
            input_dim=response_dim,
            weights_init=initialization.Orthogonal(),
            biases_init=initialization.Constant(0),
            batch_normalize=batch_normalize)

        self.children.extend([
            self.response_merge_activation,
            self.response_merge,
            self.patch_transform,
            self.merge_mlp,
            self.response_mlp])