def tag_recurrent_dropout(self, variables, recurrent_dropout, rng=None, **hyperparameters): from blocks.roles import OUTPUT, has_roles ancestors = graph.deep_ancestors(variables) for lstm in self.rnn.transitions: variables = [ var for var in ancestors if (has_roles(var, [OUTPUT]) and lstm in var.tag.annotations and var.name.endswith("states")) ] # get one dropout mask for all time steps. use the very # first state to get the hidden state shape, else we get # graph cycles. initial_state = util.the( [var for var in variables if "initial_state" in var.name]) mask = util.get_dropout_mask(initial_state.shape, recurrent_dropout, rng=rng) subsequent_states = [ var for var in variables if "initial_state" not in var.name ] graph.add_transform(subsequent_states, graph.DropoutTransform("recurrent_dropout", mask=mask), reason="regularization")
def tag_dropout(self, variables, rng=None, **hyperparameters): from blocks.roles import INPUT from blocks.filter import VariableFilter rng = util.get_rng(seed=1) bricks_ = [brick for brick in util.all_bricks(self.emitters) if isinstance(brick, bricks.Linear)] variables = (VariableFilter(roles=[INPUT], bricks=bricks_) (theano.gof.graph.ancestors(variables))) graph.add_transform( variables, graph.DropoutTransform("classifier_dropout", rng=rng), reason="regularization")
def tag_convnet_dropout(outputs, rng=None, **kwargs): from blocks.roles import has_roles, OUTPUT cnn_outputs = OrderedDict() for var in theano.gof.graph.ancestors(outputs): if (has_roles(var, [OUTPUT]) and util.annotated_by_a(util.get_convolution_classes(), var)): cnn_outputs.setdefault(util.get_path(var), []).append(var) unique_outputs = [] for path, vars in cnn_outputs.items(): vars = util.dedup(vars, equal=util.equal_computations) unique_outputs.extend(vars) graph.add_transform(unique_outputs, graph.DropoutTransform("convnet_dropout", rng=rng), reason="regularization")
def tag_attention_dropout(self, variables, rng=None, **hyperparameters): from blocks.roles import INPUT, has_roles bricks_ = [ brick for brick in util.all_bricks([self.patch_transform]) if isinstance(brick, (bricks.Linear, conv2d.Convolutional, conv3d.Convolutional)) ] variables = [ var for var in graph.deep_ancestors(variables) if (has_roles(var, [INPUT]) and any(brick in var.tag.annotations for brick in bricks_)) ] graph.add_transform(variables, graph.DropoutTransform("attention_dropout", rng=rng), reason="regularization")