def build_model(images, labels): # Construct a bottom convolutional sequence bottom_conv_sequence = convolutional_sequence((3,3), 16, (160, 160)) bottom_conv_sequence._push_allocation_config() # Flatten layer flattener = Flattener() # Construct a top MLP conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output')) #top_mlp = MLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) top_mlp = BatchNormalizedMLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) # Construct feedforward sequence ss_seq = FeedforwardSequence([bottom_conv_sequence.apply, flattener.apply, top_mlp.apply]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction) # add regularization selector = Selector([top_mlp]) Ws = selector.get_parameters('W') mlp_brick_name = 'batchnormalizedmlp' W0 = Ws['/%s/linear_0.W' % mlp_brick_name] W1 = Ws['/%s/linear_1.W' % mlp_brick_name] cost = cost_noreg + .01 * (W0 ** 2).mean() + .01 * (W1 ** 2).mean() return cost
def build_model(images, labels): vgg = VGG(layer='conv4_4') vgg.push_initialization_config() vgg.initialize() tdb = top_direction_block() tdb.push_initialization_config() tdb.initialize() # Construct feedforward sequence ss_seq = FeedforwardSequence([vgg.apply, tdb.apply]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost = StructuredCost().apply(labels, theano.tensor.clip(prediction, 1e-5, 1 - 1e-5)) cg = ComputationGraph(cost) cg_dropout = apply_dropout(cg, [VariableFilter(roles=[OUTPUT])(cg.variables)[0]], .5) cost_dropout = cg_dropout.outputs[0] # define learned parameters selector = Selector([ss_seq]) W = selector.get_parameters() parameters = [] parameters += [v for k, v in W.items()] return cost_dropout, parameters
def build_model(images, labels): # Construct a bottom convolutional sequence bottom_conv_sequence = convolutional_sequence((3, 3), 64, (150, 150)) bottom_conv_sequence._push_allocation_config() # Flatten layer flattener = Flattener() # Construct a top MLP conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output')) top_mlp = MLP([ LeakyRectifier(name='non_linear_9'), LeakyRectifier(name='non_linear_10'), Softmax(name='non_linear_11') ], [conv_out_dim, 2048, 612, 10], weights_init=IsotropicGaussian(), biases_init=Constant(1)) # Construct feedforward sequence ss_seq = FeedforwardSequence( [bottom_conv_sequence.apply, flattener.apply, top_mlp.apply]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost = CategoricalCrossEntropy().apply(labels.flatten(), prediction) return cost
class NormalizedActivation(Initializable, Feedforward): @lazy(allocation="shape broadcastable".split()) def __init__(self, shape, broadcastable, activation=None, batch_normalize=False, **kwargs): super(NormalizedActivation, self).__init__(**kwargs) self.shape = shape self.broadcastable = broadcastable self.activation = activation or Rectifier() self.batch_normalize = batch_normalize @property def broadcastable(self): return self._broadcastable or [False]*len(self.shape) @broadcastable.setter def broadcastable(self, broadcastable): self._broadcastable = broadcastable def _allocate(self): arghs = dict(shape=self.shape, broadcastable=self.broadcastable) sequence = [] if self.batch_normalize: sequence.append(Standardization(**arghs)) sequence.append(SharedScale( weights_init=Constant(1), **arghs)) sequence.append(SharedShift( biases_init=Constant(0), **arghs)) sequence.append(self.activation) self.sequence = FeedforwardSequence([ brick.apply for brick in sequence ], name="ffs") self.children = [self.sequence] @application(inputs=["input_"], outputs=["output"]) def apply(self, input_): return self.sequence.apply(input_) def get_dim(self, name): try: return dict(input_=self.shape, output=self.shape) except: return super(NormalizedActivation, self).get_dim(name)
class NormalizedActivation(Initializable, Feedforward): @lazy(allocation="shape broadcastable".split()) def __init__(self, shape, broadcastable, activation=None, batch_normalize=False, **kwargs): super(NormalizedActivation, self).__init__(**kwargs) self.shape = shape self.broadcastable = broadcastable self.activation = activation or Rectifier() self.batch_normalize = batch_normalize @property def broadcastable(self): return self._broadcastable or [False] * len(self.shape) @broadcastable.setter def broadcastable(self, broadcastable): self._broadcastable = broadcastable def _allocate(self): arghs = dict(shape=self.shape, broadcastable=self.broadcastable) sequence = [] if self.batch_normalize: sequence.append(Standardization(**arghs)) sequence.append(SharedScale(weights_init=Constant(1), **arghs)) sequence.append(SharedShift(biases_init=Constant(0), **arghs)) sequence.append(self.activation) self.sequence = FeedforwardSequence( [brick.apply for brick in sequence], name="ffs") self.children = [self.sequence] @application(inputs=["input_"], outputs=["output"]) def apply(self, input_): return self.sequence.apply(input_) def get_dim(self, name): try: return dict(input_=self.shape, output=self.shape) except: return super(NormalizedActivation, self).get_dim(name)
def build_model(images, labels): vgg = VGG(layer='conv3_4') vgg.push_initialization_config() vgg.initialize() sb = SubstractBatch() # Construct a bottom convolutional sequence layers = [ Convolutional(filter_size=(3, 3), num_filters=100, use_bias=True, tied_biases=True, name='final_conv0'), BatchNormalization(name='batchnorm_1'), Rectifier(name='final_conv0_act'), Convolutional(filter_size=(3, 3), num_filters=100, use_bias=True, tied_biases=True, name='final_conv1'), BatchNormalization(name='batchnorm_2'), Rectifier(name='final_conv1_act'), MaxPooling(pooling_size=(2, 2), name='maxpool_final') ] bottom_conv_sequence = ConvolutionalSequence( layers, num_channels=256, image_size=(40, 40), biases_init=Constant(0.), weights_init=IsotropicGaussian(0.01)) bottom_conv_sequence._push_allocation_config() # Flatten layer flattener = Flattener() # Construct a top MLP conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output')) print 'dim output conv:', bottom_conv_sequence.get_dim('output') # conv_out_dim = 20 * 40 * 40 top_mlp = BatchNormalizedMLP( [Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0)) # Construct feedforward sequence ss_seq = FeedforwardSequence([ vgg.apply, bottom_conv_sequence.apply, flattener.apply, top_mlp.apply ]) ss_seq.push_initialization_config() ss_seq.initialize() prediction = ss_seq.apply(images) cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction) # add regularization selector = Selector([top_mlp]) Ws = selector.get_parameters('W') mlp_brick_name = 'batchnormalizedmlp' W0 = Ws['/%s/linear_0.W' % mlp_brick_name] W1 = Ws['/%s/linear_1.W' % mlp_brick_name] cost = cost_noreg + .0001 * (W0**2).sum() + .001 * (W1**2).sum() # define learned parameters selector = Selector([ss_seq]) Ws = selector.get_parameters('W') bs = selector.get_parameters('b') BNSCs = selector.get_parameters('batch_norm_scale') BNSHs = selector.get_parameters('batch_norm_shift') parameters_top = [] parameters_top += [v for k, v in Ws.items()] parameters_top += [v for k, v in bs.items()] parameters_top += [v for k, v in BNSCs.items()] parameters_top += [v for k, v in BNSHs.items()] selector = Selector([vgg]) convs = selector.get_parameters() parameters_all = [] parameters_all += parameters_top parameters_all += [v for k, v in convs.items()] return cost, [parameters_top, parameters_all]