예제 #1
0
    def tag_recurrent_dropout(self,
                              variables,
                              recurrent_dropout,
                              rng=None,
                              **hyperparameters):
        from blocks.roles import OUTPUT, has_roles
        ancestors = graph.deep_ancestors(variables)
        for lstm in self.rnn.transitions:
            variables = [
                var for var in ancestors
                if (has_roles(var, [OUTPUT]) and lstm in var.tag.annotations
                    and var.name.endswith("states"))
            ]

            # get one dropout mask for all time steps.  use the very
            # first state to get the hidden state shape, else we get
            # graph cycles.
            initial_state = util.the(
                [var for var in variables if "initial_state" in var.name])
            mask = util.get_dropout_mask(initial_state.shape,
                                         recurrent_dropout,
                                         rng=rng)

            subsequent_states = [
                var for var in variables if "initial_state" not in var.name
            ]
            graph.add_transform(subsequent_states,
                                graph.DropoutTransform("recurrent_dropout",
                                                       mask=mask),
                                reason="regularization")
예제 #2
0
 def apply(self, scope, initial=False):
     if initial:
         if isinstance(scope.x_shape, tuple):
             # for featurelevel_UCF101
             batch_size = scope.x_shape[0].shape[0]
         else:
             batch_size = scope.x_shape.shape[0]
         # condition on initial shrink-to-fit patch
         scope.raw_location = T.alloc(T.cast(0.0, floatX), batch_size,
                                      self.cropper.n_spatial_dims)
         scope.raw_scale = T.zeros_like(scope.raw_location)
         scope.previous_states = self.rnn.initial_states(batch_size,
                                                         as_dict=True)
     else:
         self.locate(scope)
     self.map_to_input_space(scope)
     scope.patch, scope.savings = self.cropper.apply(
         scope.x, scope.x_shape, scope.true_location, scope.true_scale)
     graph.add_transform([scope.patch],
                         graph.WhiteNoiseTransform("patch_std"),
                         reason="regularization")
     scope.response = self.response_mlp.apply(
         T.concatenate([
             self.patch_transform.apply(scope.patch),
             self.merge_mlp.apply(
                 T.concatenate([scope.raw_location, scope.raw_scale],
                               axis=1)),
         ],
                       axis=1))
     embedding = self.embedder.apply(scope.response)
     scope.rnn_inputs = dict(inputs=embedding, **scope.previous_states)
     scope.rnn_outputs = self.rnn.apply(iterate=False,
                                        as_dict=True,
                                        **scope.rnn_inputs)
     return scope
예제 #3
0
    def apply(self, input_):
        aggregate_axes = [0] + [1 + i for i, b in enumerate(self.broadcastable) if b]
        # NOTE: don't put batch_stats on self because apply may be
        # called multiple times
        batch_stats = dict(
            (stat, getattr(input_, stat)(axis=aggregate_axes,
                                         keepdims=True))
            for stat in self.stats)

        for stat, role in self.roles.items():
            graph.add_transform([batch_stats[stat]],
                                graph.ConstantTransform(
                                    # adding zero to ensure it's a TensorType(float32, row)
                                    # just like the corresponding batch_stat, rather than a
                                    # CudaNdarray(float32, row).  -__-
                                    0 + T.patternbroadcast(
                                        self.population_stats[stat],
                                        [True] + self.broadcastable)),
                                reason="population_normalization")

            # make the batch statistics identifiable to get_updates() below
            add_role(batch_stats[stat], self.roles[stat])
            batch_stats[stat] = self.annotated_statistic(batch_stats[stat])

        gamma = T.patternbroadcast(self.gamma, [True] + self.broadcastable)
        beta = T.patternbroadcast(self.beta, [True] + self.broadcastable)
        return theano.tensor.nnet.bn.batch_normalization(
            inputs=input_, gamma=gamma, beta=beta,
            mean=batch_stats["mean"],
            std=T.sqrt(batch_stats["var"] + self.epsilon))
예제 #4
0
 def tag_dropout(self, variables, rng=None, **hyperparameters):
     from blocks.roles import INPUT
     from blocks.filter import VariableFilter
     bricks_ = [brick for brick in util.all_bricks([self.mlp])
                if isinstance(brick, bricks.Linear)]
     variables = (VariableFilter(roles=[INPUT], bricks=bricks_)
                  (theano.gof.graph.ancestors(variables)))
     graph.add_transform(
         variables,
         graph.DropoutTransform("classifier_dropout", rng=rng),
         reason="regularization")
예제 #5
0
 def tag_dropout(self, variables, rng=None, **hyperparameters):
     from blocks.roles import INPUT
     from blocks.filter import VariableFilter
     rng = util.get_rng(seed=1)
     bricks_ = [brick for brick in util.all_bricks(self.emitters)
                if isinstance(brick, bricks.Linear)]
     variables = (VariableFilter(roles=[INPUT], bricks=bricks_)
                  (theano.gof.graph.ancestors(variables)))
     graph.add_transform(
         variables,
         graph.DropoutTransform("classifier_dropout", rng=rng),
         reason="regularization")
예제 #6
0
 def tag_recurrent_weight_noise(self,
                                variables,
                                rng=None,
                                **hyperparameters):
     variables = [
         var for var in graph.deep_ancestors(variables)
         if var.name == "weight_noise_goes_here"
     ]
     graph.add_transform(variables,
                         graph.WhiteNoiseTransform("recurrent_weight_noise",
                                                   rng=rng),
                         reason="regularization")
예제 #7
0
 def locate(self, scope, initial=False):
     scope.theta = self.theta_from_area.apply(
         self.locate_mlp.apply(
             scope.previous_states[self.attention_state_name]))
     location, scale = (scope.theta[:, :self.n_spatial_dims],
                        scope.theta[:, self.n_spatial_dims:])
     graph.add_transform([location],
                         graph.WhiteNoiseTransform("location_std"),
                         reason="regularization")
     graph.add_transform([scale],
                         graph.WhiteNoiseTransform("scale_std"),
                         reason="regularization")
     scope.raw_location = location.copy()
     scope.raw_scale = scale.copy()
예제 #8
0
def tag_convnet_dropout(outputs, rng=None, **kwargs):
    from blocks.roles import has_roles, OUTPUT
    cnn_outputs = OrderedDict()
    for var in theano.gof.graph.ancestors(outputs):
        if (has_roles(var, [OUTPUT])
                and util.annotated_by_a(util.get_convolution_classes(), var)):
            cnn_outputs.setdefault(util.get_path(var), []).append(var)
    unique_outputs = []
    for path, vars in cnn_outputs.items():
        vars = util.dedup(vars, equal=util.equal_computations)
        unique_outputs.extend(vars)
    graph.add_transform(unique_outputs,
                        graph.DropoutTransform("convnet_dropout", rng=rng),
                        reason="regularization")
예제 #9
0
def tag_convnet_dropout(outputs, rng=None, **kwargs):
    from blocks.roles import has_roles, OUTPUT
    cnn_outputs = OrderedDict()
    for var in theano.gof.graph.ancestors(outputs):
        if (has_roles(var, [OUTPUT]) and util.annotated_by_a(
                util.get_convolution_classes(), var)):
            cnn_outputs.setdefault(util.get_path(var), []).append(var)
    unique_outputs = []
    for path, vars in cnn_outputs.items():
        vars = util.dedup(vars, equal=util.equal_computations)
        unique_outputs.append(util.the(vars))
    graph.add_transform(
        unique_outputs,
        graph.DropoutTransform("convnet_dropout", rng=rng),
        reason="regularization")
예제 #10
0
 def tag_attention_dropout(self, variables, rng=None, **hyperparameters):
     from blocks.roles import INPUT, has_roles
     bricks_ = [
         brick for brick in util.all_bricks([self.patch_transform])
         if isinstance(brick, (bricks.Linear, conv2d.Convolutional,
                               conv3d.Convolutional))
     ]
     variables = [
         var for var in graph.deep_ancestors(variables)
         if (has_roles(var, [INPUT]) and any(brick in var.tag.annotations
                                             for brick in bricks_))
     ]
     graph.add_transform(variables,
                         graph.DropoutTransform("attention_dropout",
                                                rng=rng),
                         reason="regularization")