def tag_recurrent_dropout(self, variables, recurrent_dropout, rng=None, **hyperparameters): from blocks.roles import OUTPUT, has_roles ancestors = graph.deep_ancestors(variables) for lstm in self.rnn.transitions: variables = [ var for var in ancestors if (has_roles(var, [OUTPUT]) and lstm in var.tag.annotations and var.name.endswith("states")) ] # get one dropout mask for all time steps. use the very # first state to get the hidden state shape, else we get # graph cycles. initial_state = util.the( [var for var in variables if "initial_state" in var.name]) mask = util.get_dropout_mask(initial_state.shape, recurrent_dropout, rng=rng) subsequent_states = [ var for var in variables if "initial_state" not in var.name ] graph.add_transform(subsequent_states, graph.DropoutTransform("recurrent_dropout", mask=mask), reason="regularization")
def apply(self, scope, initial=False): if initial: if isinstance(scope.x_shape, tuple): # for featurelevel_UCF101 batch_size = scope.x_shape[0].shape[0] else: batch_size = scope.x_shape.shape[0] # condition on initial shrink-to-fit patch scope.raw_location = T.alloc(T.cast(0.0, floatX), batch_size, self.cropper.n_spatial_dims) scope.raw_scale = T.zeros_like(scope.raw_location) scope.previous_states = self.rnn.initial_states(batch_size, as_dict=True) else: self.locate(scope) self.map_to_input_space(scope) scope.patch, scope.savings = self.cropper.apply( scope.x, scope.x_shape, scope.true_location, scope.true_scale) graph.add_transform([scope.patch], graph.WhiteNoiseTransform("patch_std"), reason="regularization") scope.response = self.response_mlp.apply( T.concatenate([ self.patch_transform.apply(scope.patch), self.merge_mlp.apply( T.concatenate([scope.raw_location, scope.raw_scale], axis=1)), ], axis=1)) embedding = self.embedder.apply(scope.response) scope.rnn_inputs = dict(inputs=embedding, **scope.previous_states) scope.rnn_outputs = self.rnn.apply(iterate=False, as_dict=True, **scope.rnn_inputs) return scope
def apply(self, input_): aggregate_axes = [0] + [1 + i for i, b in enumerate(self.broadcastable) if b] # NOTE: don't put batch_stats on self because apply may be # called multiple times batch_stats = dict( (stat, getattr(input_, stat)(axis=aggregate_axes, keepdims=True)) for stat in self.stats) for stat, role in self.roles.items(): graph.add_transform([batch_stats[stat]], graph.ConstantTransform( # adding zero to ensure it's a TensorType(float32, row) # just like the corresponding batch_stat, rather than a # CudaNdarray(float32, row). -__- 0 + T.patternbroadcast( self.population_stats[stat], [True] + self.broadcastable)), reason="population_normalization") # make the batch statistics identifiable to get_updates() below add_role(batch_stats[stat], self.roles[stat]) batch_stats[stat] = self.annotated_statistic(batch_stats[stat]) gamma = T.patternbroadcast(self.gamma, [True] + self.broadcastable) beta = T.patternbroadcast(self.beta, [True] + self.broadcastable) return theano.tensor.nnet.bn.batch_normalization( inputs=input_, gamma=gamma, beta=beta, mean=batch_stats["mean"], std=T.sqrt(batch_stats["var"] + self.epsilon))
def tag_dropout(self, variables, rng=None, **hyperparameters): from blocks.roles import INPUT from blocks.filter import VariableFilter bricks_ = [brick for brick in util.all_bricks([self.mlp]) if isinstance(brick, bricks.Linear)] variables = (VariableFilter(roles=[INPUT], bricks=bricks_) (theano.gof.graph.ancestors(variables))) graph.add_transform( variables, graph.DropoutTransform("classifier_dropout", rng=rng), reason="regularization")
def tag_dropout(self, variables, rng=None, **hyperparameters): from blocks.roles import INPUT from blocks.filter import VariableFilter rng = util.get_rng(seed=1) bricks_ = [brick for brick in util.all_bricks(self.emitters) if isinstance(brick, bricks.Linear)] variables = (VariableFilter(roles=[INPUT], bricks=bricks_) (theano.gof.graph.ancestors(variables))) graph.add_transform( variables, graph.DropoutTransform("classifier_dropout", rng=rng), reason="regularization")
def tag_recurrent_weight_noise(self, variables, rng=None, **hyperparameters): variables = [ var for var in graph.deep_ancestors(variables) if var.name == "weight_noise_goes_here" ] graph.add_transform(variables, graph.WhiteNoiseTransform("recurrent_weight_noise", rng=rng), reason="regularization")
def locate(self, scope, initial=False): scope.theta = self.theta_from_area.apply( self.locate_mlp.apply( scope.previous_states[self.attention_state_name])) location, scale = (scope.theta[:, :self.n_spatial_dims], scope.theta[:, self.n_spatial_dims:]) graph.add_transform([location], graph.WhiteNoiseTransform("location_std"), reason="regularization") graph.add_transform([scale], graph.WhiteNoiseTransform("scale_std"), reason="regularization") scope.raw_location = location.copy() scope.raw_scale = scale.copy()
def tag_convnet_dropout(outputs, rng=None, **kwargs): from blocks.roles import has_roles, OUTPUT cnn_outputs = OrderedDict() for var in theano.gof.graph.ancestors(outputs): if (has_roles(var, [OUTPUT]) and util.annotated_by_a(util.get_convolution_classes(), var)): cnn_outputs.setdefault(util.get_path(var), []).append(var) unique_outputs = [] for path, vars in cnn_outputs.items(): vars = util.dedup(vars, equal=util.equal_computations) unique_outputs.extend(vars) graph.add_transform(unique_outputs, graph.DropoutTransform("convnet_dropout", rng=rng), reason="regularization")
def tag_convnet_dropout(outputs, rng=None, **kwargs): from blocks.roles import has_roles, OUTPUT cnn_outputs = OrderedDict() for var in theano.gof.graph.ancestors(outputs): if (has_roles(var, [OUTPUT]) and util.annotated_by_a( util.get_convolution_classes(), var)): cnn_outputs.setdefault(util.get_path(var), []).append(var) unique_outputs = [] for path, vars in cnn_outputs.items(): vars = util.dedup(vars, equal=util.equal_computations) unique_outputs.append(util.the(vars)) graph.add_transform( unique_outputs, graph.DropoutTransform("convnet_dropout", rng=rng), reason="regularization")
def tag_attention_dropout(self, variables, rng=None, **hyperparameters): from blocks.roles import INPUT, has_roles bricks_ = [ brick for brick in util.all_bricks([self.patch_transform]) if isinstance(brick, (bricks.Linear, conv2d.Convolutional, conv3d.Convolutional)) ] variables = [ var for var in graph.deep_ancestors(variables) if (has_roles(var, [INPUT]) and any(brick in var.tag.annotations for brick in bricks_)) ] graph.add_transform(variables, graph.DropoutTransform("attention_dropout", rng=rng), reason="regularization")