def test_batch_normalization_inside_convolutional_sequence(): """Test that BN bricks work in ConvolutionalSequences.""" conv_seq = ConvolutionalSequence( [Convolutional(filter_size=(3, 3), num_filters=4), BatchNormalization(broadcastable=(False, True, True)), AveragePooling(pooling_size=(2, 2)), BatchNormalization(broadcastable=(False, False, False)), MaxPooling(pooling_size=(2, 2), step=(1, 1))], weights_init=Constant(1.), biases_init=Constant(2.), image_size=(10, 8), num_channels=9) conv_seq_no_bn = ConvolutionalSequence( [Convolutional(filter_size=(3, 3), num_filters=4), AveragePooling(pooling_size=(2, 2)), MaxPooling(pooling_size=(2, 2), step=(1, 1))], weights_init=Constant(1.), biases_init=Constant(2.), image_size=(10, 8), num_channels=9) conv_seq.initialize() conv_seq_no_bn.initialize() rng = numpy.random.RandomState((2015, 12, 17)) input_ = random_unif(rng, (2, 9, 10, 8)) x = theano.tensor.tensor4() ybn = conv_seq.apply(x) y = conv_seq_no_bn.apply(x) yield (assert_equal, ybn.eval({x: input_}), y.eval({x: input_})) std = conv_seq.children[-2].population_stdev std.set_value(3 * std.get_value(borrow=True)) yield (assert_equal, ybn.eval({x: input_}), y.eval({x: input_}) / 3.)
class EncoderMapping(Initializable): """ Parameters ---------- layers: :class:`list` list of bricks num_channels: :class: `int` Number of input channels image_size: :class:`tuple` Image size n_emb: :class:`int` Dimensionality of the embedding use_bias: :class:`bool` self explanatory """ def __init__(self, layers, num_channels, image_size, n_emb, use_bias=False, **kwargs): self.layers = layers self.num_channels = num_channels self.image_size = image_size self.pre_encoder = ConvolutionalSequence(layers=layers[:-1], num_channels=num_channels, image_size=image_size, use_bias=use_bias, name='encoder_conv_mapping') self.pre_encoder.allocate() n_channels = n_emb + self.pre_encoder.get_dim('output')[0] self.post_encoder = ConvolutionalSequence(layers=[layers[-1]], num_channels=n_channels, image_size=(1, 1), use_bias=use_bias) children = [self.pre_encoder, self.post_encoder] kwargs.setdefault('children', []).extend(children) super(EncoderMapping, self).__init__(**kwargs) @application(inputs=['x', 'y'], outputs=['output']) def apply(self, x, y): "Returns mu and logsigma" # Getting emebdding pre_z = self.pre_encoder.apply(x) # Concatenating pre_z_embed_y = tensor.concatenate([pre_z, y], axis=1) # propagating through last layer return self.post_encoder.apply(pre_z_embed_y)
def test_convolutional_sequence(): x = tensor.tensor4('x') num_channels = 4 pooling_size = 3 batch_size = 5 activation = Rectifier().apply conv = ConvolutionalLayer(activation, (3, 3), 5, (pooling_size, pooling_size), weights_init=Constant(1.), biases_init=Constant(5.)) conv2 = ConvolutionalActivation(activation, (2, 2), 4, weights_init=Constant(1.)) seq = ConvolutionalSequence([conv, conv2], num_channels, image_size=(17, 13)) seq.push_allocation_config() assert conv.num_channels == 4 assert conv2.num_channels == 5 conv2.convolution.use_bias = False y = seq.apply(x) seq.initialize() func = function([x], y) x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX) y_val = (numpy.ones((batch_size, 4, 4, 3)) * (9 * 4 + 5) * 4 * 5) assert_allclose(func(x_val), y_val)
def build_conv_layers(self, image=None): if image is None: image = T.ftensor4('spectrogram') else: image = image conv_list = [] for layer in range(self.layers): layer_param = self.params[layer] conv_layer = Convolutional(layer_param[0], layer_param[1], layer_param[2]) pool_layer = MaxPooling(layer_param[3]) conv_layer.name = "convolution" + str(layer) pool_layer.name = "maxpooling" + str(layer) conv_list.append(conv_layer) conv_list.append(pool_layer) conv_list.append(Rectifier()) conv_seq = ConvolutionalSequence(conv_list, self.params[0][2], image_size=self.image_size, weights_init=IsotropicGaussian( std=0.5, mean=0), biases_init=Constant(0)) conv_seq._push_allocation_config() conv_seq.initialize() out = conv_seq.apply(image) return out, conv_seq.get_dim('output')
def test_convolutional_sequence_with_no_input_size(): # suppose x is outputted by some RNN x = tensor.tensor4('x') filter_size = (1, 1) num_filters = 2 num_channels = 1 pooling_size = (1, 1) conv = Convolutional(filter_size, num_filters, tied_biases=False, weights_init=Constant(1.), biases_init=Constant(1.)) act = Rectifier() pool = MaxPooling(pooling_size) bad_seq = ConvolutionalSequence([conv, act, pool], num_channels, tied_biases=False) assert_raises_regexp(ValueError, 'Cannot infer bias size \S+', bad_seq.initialize) seq = ConvolutionalSequence([conv, act, pool], num_channels, tied_biases=True) try: seq.initialize() out = seq.apply(x) except TypeError: assert False, "This should have succeeded" assert out.ndim == 4
def build_conv_layers(self, image=None) : if image is None : image = T.ftensor4('spectrogram') else : image = image conv_list = [] for layer in range(self.layers) : layer_param = self.params[layer] conv_layer = Convolutional(layer_param[0], layer_param[1], layer_param[2]) pool_layer = MaxPooling(layer_param[3]) conv_layer.name = "convolution"+str(layer) pool_layer.name = "maxpooling"+str(layer) conv_list.append(conv_layer) conv_list.append(pool_layer) conv_list.append(Rectifier()) conv_seq = ConvolutionalSequence( conv_list, self.params[0][2], image_size=self.image_size, weights_init=IsotropicGaussian(std=0.5, mean=0), biases_init=Constant(0)) conv_seq._push_allocation_config() conv_seq.initialize() out = conv_seq.apply(image) return out, conv_seq.get_dim('output')
class Decoder(Initializable): def __init__(self, layers, num_channels, image_size, use_bias=False, **kwargs): self.layers = layers self.num_channels = num_channels self.image_size = image_size self.mapping = ConvolutionalSequence(layers=layers, num_channels=num_channels, image_size=image_size, use_bias=use_bias, name='decoder_mapping') children = [self.mapping] kwargs.setdefault('children', []).extend(children) super(Decoder, self).__init__(**kwargs) @application(inputs=['z', 'y'], outputs=['outputs']) def apply(self, z, y, application_call): # Concatenating conditional data with inputs z_y = tensor.concatenate([z, y], axis=1) return self.mapping.apply(z_y)
def inception(image_shape, num_input, conv1, conv2, conv3, conv4, conv5, conv6, out, i): layers1 = [] layers2 = [] layers3 = [] layers4 = [] layers1.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers1.append(BatchNormalization(name='batch_{}'.format(i))) layers1.append(Rectifier()) conv_sequence1 = ConvolutionalSequence(layers1, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence1.initialize() out1 = conv_sequence1.apply(out) i = i + 1 layers2.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers2.append(BatchNormalization(name='batch_{}'.format(i))) layers2.append(Rectifier()) i = i + 1 layers2.append(Convolutional(filter_size=(3,3), num_channels=conv2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers2.append(BatchNormalization(name='batch_{}'.format(i))) layers2.append(Rectifier()) conv_sequence2 = ConvolutionalSequence(layers2, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence2.initialize() out2 = conv_sequence2.apply(out) i = i + 1 layers3.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv4, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers3.append(BatchNormalization(name='batch_{}'.format(i))) layers3.append(Rectifier()) i = i + 1 layers3.append(Convolutional(filter_size=(5,5), num_channels=conv4, num_filters=conv5, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers3.append(BatchNormalization(name='batch_{}'.format(i))) layers3.append(Rectifier()) conv_sequence3 = ConvolutionalSequence(layers3, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence3.initialize() out3 = conv_sequence3.apply(out) i = i + 1 layers4.append(MaxPooling((3,3), step=(1,1), padding=(1,1), name='pool_{}'.format(i))) layers4.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv6, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers4.append(BatchNormalization(name='batch_{}'.format(i))) layers4.append(Rectifier()) i = i + 1 conv_sequence4 = ConvolutionalSequence(layers4, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence4.initialize() out4 = conv_sequence4.apply(out) #Merge return T.concatenate([out1, out2, out3, out4], axis=1)
def test_convolutional_sequence_with_raw_activation(): seq = ConvolutionalSequence([Rectifier()], num_channels=4, image_size=(20, 14)) input_ = (((numpy.arange(2 * 4 * 20 * 14) .reshape((2, 4, 20, 14)) % 2) * 2 - 1) .astype(theano.config.floatX)) expected_ = input_ * (input_ > 0) x = theano.tensor.tensor4() assert_allclose(seq.apply(x).eval({x: input_}), expected_)
def test_convolutional_sequence_with_raw_activation(): seq = ConvolutionalSequence([Rectifier()], num_channels=4, image_size=(20, 14)) input_ = (((numpy.arange(2 * 4 * 20 * 14).reshape( (2, 4, 20, 14)) % 2) * 2 - 1).astype(theano.config.floatX)) expected_ = input_ * (input_ > 0) x = theano.tensor.tensor4() assert_allclose(seq.apply(x).eval({x: input_}), expected_)
def test_pooling_works_in_convolutional_sequence(): x = tensor.tensor4('x') brick = ConvolutionalSequence([AveragePooling((2, 2), step=(2, 2)), MaxPooling((4, 4), step=(2, 2), ignore_border=True)], image_size=(16, 32), num_channels=3) brick.allocate() y = brick.apply(x) out = y.eval({x: numpy.empty((2, 3, 16, 32), dtype=theano.config.floatX)}) assert out.shape == (2, 3, 3, 7)
def test_fully_layer(): batch_size=2 x = T.tensor4(); y = T.ivector() V = 200 layer_conv = Convolutional(filter_size=(5,5),num_filters=V, name="toto", weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) # try with no bias activation = Rectifier() pool = MaxPooling(pooling_size=(2,2)) convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15, image_size=(10,10), name="conv_section") convnet.push_allocation_config() convnet.initialize() output=convnet.apply(x) batch_size=output.shape[0] output_dim=np.prod(convnet.get_dim('output')) result_conv = output.reshape((batch_size, output_dim)) mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) mlp.initialize() output=mlp.apply(result_conv) cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output)) cg = ComputationGraph(cost) W = VariableFilter(roles=[WEIGHT])(cg.variables) B = VariableFilter(roles=[BIAS])(cg.variables) W = W[0]; b = B[0] inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg) outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg) var_input=inputs_fully[0] var_output=outputs_fully[0] [d_W,d_S,d_b] = T.grad(cost, [W, var_output, b]) d_b = d_b.dimshuffle(('x',0)) d_p = T.concatenate([d_W, d_b], axis=0) x_value = 1e3*np.random.ranf((2,15, 10, 10)) f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore') A, B, C= f(x_value, [5, 0]) A = np.concatenate([A, np.ones((2,1))], axis=1) print 'A', A.shape print 'B', B.shape print 'C', C.shape print lin.norm(C - np.dot(np.transpose(A), B), 'fro') return """
def test_convolutional_sequence_use_bias(): cnn = ConvolutionalSequence( sum([[Convolutional(filter_size=(1, 1), num_filters=1), Rectifier()] for _ in range(3)], []), num_channels=1, image_size=(1, 1), use_bias=False) cnn.allocate() x = tensor.tensor4() y = cnn.apply(x) params = ComputationGraph(y).parameters assert len(params) == 3 and all(param.name == 'W' for param in params)
def test_convolutional_sequence_use_bias(): cnn = ConvolutionalSequence( [ConvolutionalActivation(activation=Rectifier().apply, filter_size=(1, 1), num_filters=1) for _ in range(3)], num_channels=1, image_size=(1, 1), use_bias=False, ) cnn.allocate() x = tensor.tensor4() y = cnn.apply(x) params = ComputationGraph(y).parameters assert len(params) == 3 and all(param.name == "W" for param in params)
def test_convolutional_sequence_with_convolutions_raw_activation(): seq = ConvolutionalSequence( [Convolutional(filter_size=(3, 3), num_filters=4), Rectifier(), Convolutional(filter_size=(5, 5), num_filters=3, step=(2, 2)), Tanh()], num_channels=2, image_size=(21, 39)) seq.allocate() x = theano.tensor.tensor4() out = seq.apply(x).eval({x: numpy.ones((10, 2, 21, 39), dtype=theano.config.floatX)}) assert out.shape == (10, 3, 8, 17)
def conv_block(input_img, n_filter, filter_size, input_featuremap_size, ordering=''): # found in torch spatialconvolution std0 = 2. / (filter_size[0] * filter_size[1] * input_featuremap_size[0])**.5 std1 = 2. / (input_featuremap_size[0])**.5 layers = [] layers.append( Convolutional(filter_size=filter_size, num_filters=n_filter, border_mode='half', name='conv%s_1' % (ordering, ), use_bias=True, weights_init=Uniform(width=std0))) layers.append(BatchNormalization(name='bn%s_1' % (ordering, ))) layers.append(LeakyReLU()) layers.append( Convolutional(filter_size=filter_size, num_filters=n_filter, border_mode='half', name='conv%s_2' % (ordering, ), use_bias=True, weights_init=Uniform(width=std0))) layers.append(BatchNormalization(name='bn%s_2' % (ordering, ))) layers.append(LeakyReLU()) layers.append( Convolutional(filter_size=(1, 1), num_filters=n_filter, border_mode='valid', name='conv%s_3b' % (ordering, ), use_bias=True, weights_init=Uniform(width=std1))) layers.append(BatchNormalization(name='bn%s_3' % (ordering, ))) layers.append(LeakyReLU()) conv_sequence = ConvolutionalSequence( layers, num_channels=input_featuremap_size[0], image_size=(input_featuremap_size[1], input_featuremap_size[2]), biases_init=Uniform(width=.1), name='convsequence%s' % (ordering, )) conv_sequence.initialize() return conv_sequence.apply(input_img)
def main(): initial = numpy.random.normal(0, 0.1, (1, 1, 200, 200)) x = theano.shared(initial) conv_layer = ConvolutionalLayer( Rectifier().apply, (16, 16), 9, (4, 4), 1 ) conv_layer2 = ConvolutionalLayer( Rectifier().apply, (7, 7), 9, (2, 2), 1 ) con_seq = ConvolutionalSequence([conv_layer], 1, image_size=(200, 200), weights_init=IsotropicGaussian(0.1), biases_init=Constant(0.) ) con_seq.initialize() out = con_seq.apply(x) target_out = out[0, 0, 1, 1] grad = theano.grad(target_out - .1 * (x ** 2).sum(), x) updates = {x: x + 5e-1 * grad} #x.set_value(numpy.ones((1, 1, 200, 200))) #print theano.function([], out)() make_step = theano.function([], target_out, updates=updates) for i in xrange(400): out_val = make_step() print i, out_val image = x.get_value()[0][0] image = (image - image.mean()) / image.std() image = numpy.array([image, image, image]).transpose(1, 2, 0) plt.imshow(numpy.cast['uint8'](image * 65. + 128.), interpolation='none') plt.show()
x = tensor.tensor4('image_features') y = tensor.lmatrix('targets') num_epochs = 1000 layers = [] ###############FIRST STAGE####################### #Create the convolutions layers layers.append(Convolutional(filter_size=(7,7), step=(2,2), num_filters=96, border_mode='half', name='conv_0')) layers.append(BatchNormalization(name='batch_0')) layers.append(Rectifier()) layers.append(MaxPooling((3,3), step=(2,2), padding=(1,1), name='pool_0')) convSeq = ConvolutionalSequence(layers, num_channels=3, image_size=(220,220), weights_init=Orthogonal(), use_bias=False, name='ConvSeq') convSeq.initialize() out = convSeq.apply(x) #FIRE MODULES out1 = Fire((55,55), 96, 16, 16, 16, out, 10) out2 = Fire((55,55), 128, 16, 16, 16, out1, 25) out3 = Fire((55,55), 128, 32, 32, 32, out2, 300) out31 = MaxPooling((3,3), step=(2,2), padding=(1,1), name='poolLow').apply(out3) out4 = Fire((28,28), 256, 32, 32, 32, out31, 45) out5 = Fire((28,28), 256, 48, 48, 48, out4, 500) out6 = Fire((28,28), 384, 48, 48, 48, out5, 65) out7 = Fire((28,28), 384, 64, 64, 64, out6, 700) out71 = MaxPooling((3,3), step=(2,2), padding=(1,1), name='poolLow2').apply(out7) out8 = Fire((14,14), 512, 64, 64, 64, out71, 85) #LAST LAYERS conv_layers1 = list([Convolutional(filter_size=(1,1), num_filters=2, name='Convx2'), BatchNormalization(name='batch_vx2'), Rectifier(),
layers.append(Convolutional(filter_size=(1,1), num_filters=64, border_mode='half', name='conv_1')) layers.append(BatchNormalization(name='batch_1')) layers.append(Rectifier()) layers.append(MaxPooling((3,3), step=(2,2), padding=(1,1), name='pool_1')) layers.append(Convolutional(filter_size=(3,3), num_filters=192, border_mode='half', name='conv_2')) layers.append(BatchNormalization(name='batch_2')) layers.append(Rectifier()) layers.append(MaxPooling((3,3), step=(2,2), padding=(1,1), name='pool_2')) #Create the sequence conv_sequence = ConvolutionalSequence(layers, num_channels=3, image_size=(160,160), weights_init=Orthogonal(), use_bias=False, name='convSeq') #Initialize the convnet conv_sequence.initialize() #Output the first result out = conv_sequence.apply(x) ###############SECOND STAGE##################### out2 = inception((20,20), 192, 64, 96, 128, 16, 32, 32, out, 10) out3 = inception((20,20), 256, 128, 128, 192, 32, 96, 64, out2, 20) out31 = MaxPooling((2,2), name='poolLow').apply(out3) out4 = inception((10,10), 480, 192, 96, 208, 16, 48, 64, out31, 30) out5 = inception((10,10), 512, 160, 112, 224, 24, 64, 64, out4, 40) out6 = inception((10,10), 512, 128, 128, 256, 24, 64, 64, out5, 50) out7 = inception((10,10), 512, 112, 144, 288, 32, 64, 64, out6, 60) out8 = inception((10,10), 528, 256, 160, 320, 32, 128, 128, out7, 70) out81 = MaxPooling((2,2), name='poolLow1').apply(out8) out9 = inception((5,5), 832, 256, 160, 320, 32, 128, 128, out81, 80) out10 = inception((5,5), 832, 384, 192, 384, 48, 128, 128, out9, 90)
pooling_sizes = [(2, 2)] * 2 activation = Logistic().apply conv_layers = [ b.ConvolutionalLayer(activation, filter_size, num_filters_, pooling_size, num_channels=3) for filter_size, num_filters_, pooling_size in zip(filter_sizes, num_filters, pooling_sizes) ] convnet = ConvolutionalSequence(conv_layers, num_channels=3, image_size=(32, 32), weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) convnet.initialize() conv_features = Flattener().apply(convnet.apply(X)) # MLP mlp = MLP(activations=[Logistic(name='sigmoid_0'), Softmax(name='softmax_1')], dims=[ 256, 256, 256, 2], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) [child.name for child in mlp.children] ['linear_0', 'sigmoid_0', 'linear_1', 'softmax_1'] Y = mlp.apply(conv_features) mlp.initialize() # Setting up the cost function from blocks.bricks.cost import CategoricalCrossEntropy
def inception(image_shape, num_input, conv1, conv2, conv3, conv4, conv5, conv6, out, i): layers1 = [] layers2 = [] layers3 = [] layers4 = [] layers1.append( Convolutional(filter_size=(1, 1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers1.append(BatchNormalization(name='batch_{}'.format(i))) layers1.append(Rectifier()) conv_sequence1 = ConvolutionalSequence(layers1, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence1.initialize() out1 = conv_sequence1.apply(out) i = i + 1 layers2.append( Convolutional(filter_size=(1, 1), num_channels=num_input, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers2.append(BatchNormalization(name='batch_{}'.format(i))) layers2.append(Rectifier()) i = i + 1 layers2.append( Convolutional(filter_size=(3, 3), num_channels=conv2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers2.append(BatchNormalization(name='batch_{}'.format(i))) layers2.append(Rectifier()) conv_sequence2 = ConvolutionalSequence(layers2, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence2.initialize() out2 = conv_sequence2.apply(out) i = i + 1 layers3.append( Convolutional(filter_size=(1, 1), num_channels=num_input, num_filters=conv4, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers3.append(BatchNormalization(name='batch_{}'.format(i))) layers3.append(Rectifier()) i = i + 1 layers3.append( Convolutional(filter_size=(5, 5), num_channels=conv4, num_filters=conv5, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers3.append(BatchNormalization(name='batch_{}'.format(i))) layers3.append(Rectifier()) conv_sequence3 = ConvolutionalSequence(layers3, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence3.initialize() out3 = conv_sequence3.apply(out) i = i + 1 layers4.append( MaxPooling((3, 3), step=(1, 1), padding=(1, 1), name='pool_{}'.format(i))) layers4.append( Convolutional(filter_size=(1, 1), num_channels=num_input, num_filters=conv6, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers4.append(BatchNormalization(name='batch_{}'.format(i))) layers4.append(Rectifier()) i = i + 1 conv_sequence4 = ConvolutionalSequence(layers4, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence4.initialize() out4 = conv_sequence4.apply(out) #Merge return T.concatenate([out1, out2, out3, out4], axis=1)
def main(): # # # # # # # # # # # # Modeling Building # # # # # # # # # # # # # ConvOp requires input be a 4D tensor x = tensor.tensor4("features") y = tensor.ivector("targets") # Convolutional Layers # ==================== # "Improving neural networks by preventing co-adaptation of feature detectors" # conv_layers = [ # # ConvolutionalLayer(activiation, filter_size, num_filters, pooling_size, name) # ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l1') # , ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l2') # , ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l3') # ] # "VGGNet" conv_layers = [ ConvolutionalActivation(Rectifier().apply, (3,3), 64, border_mode='full', name='l1') , ConvolutionalLayer(Rectifier().apply, (3,3), 64, (2,2), border_mode='full', name='l2') , ConvolutionalActivation(Rectifier().apply, (3,3), 128, border_mode='full', name='l3') , ConvolutionalLayer(Rectifier().apply, (3,3), 128, (2,2), border_mode='full', name='l4') , ConvolutionalActivation(Rectifier().apply, (3,3), 256, border_mode='full', name='l5') , ConvolutionalLayer(Rectifier().apply, (3,3), 256, (2,2), border_mode='full', name='l6') ] # Bake my own # conv_layers = [ # # ConvolutionalLayer(activiation, filter_size, num_filters, pooling_size, name) # ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l1') # , ConvolutionalLayer(Rectifier().apply, (3,3), 128, (2,2), border_mode='full', name='l2') # , ConvolutionalActivation(Rectifier().apply, (3,3), 256, border_mode='full', name='l3') # , ConvolutionalLayer(Rectifier().apply, (3,3), 256, (2,2), border_mode='full', name='l4') # ] convnet = ConvolutionalSequence( conv_layers, num_channels=3, image_size=(32,32), weights_init=IsotropicGaussian(0.1), biases_init=Constant(0) ) convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) # Fully Connected Layers # ====================== conv_features = convnet.apply(x) features = Flattener().apply(conv_features) mlp = MLP( activations=[Rectifier()]*2+[None] , dims=[output_dim, 256, 256, 10] , weights_init=IsotropicGaussian(0.01) , biases_init=Constant(0) ) mlp.initialize() y_hat = mlp.apply(features) # print y_hat.shape.eval({x: np.zeros((1, 3, 32, 32), dtype=theano.config.floatX)}) # Numerically Stable Softmax cost = Softmax().categorical_cross_entropy(y, y_hat) error_rate = MisclassificationRate().apply(y, y_hat) cg = ComputationGraph(cost) weights = VariableFilter(roles=[FILTER, WEIGHT])(cg.variables) l2_regularization = 0.005 * sum((W**2).sum() for W in weights) cost = cost + l2_regularization cost.name = 'cost_with_regularization' # Print sizes to check print("Representation sizes:") for layer in convnet.layers: print(layer.get_dim('input_')) # # # # # # # # # # # # Modeling Training # # # # # # # # # # # # # Figure out data source train = CIFAR10("train") test = CIFAR10("test") # Load Data Using Fuel train_stream = DataStream.default_stream( dataset=train , iteration_scheme=SequentialScheme(train.num_examples, batch_size=128)) test_stream = DataStream.default_stream( dataset=test , iteration_scheme=SequentialScheme(test.num_examples, batch_size=1024)) # Train algorithm = GradientDescent( cost=cost , params=cg.parameters , step_rule=Adam(learning_rate=0.0005) ) main_loop = MainLoop( model=Model(cost) , data_stream=train_stream , algorithm=algorithm , extensions=[ TrainingDataMonitoring( [cost, error_rate] , prefix='train' , after_epoch=True) , DataStreamMonitoring( [cost, error_rate] , test_stream, prefix='test') , ExperimentSaver(dest_directory='...', src_directory='.') , Printing() , ProgressBar() ] ) main_loop.run()
step=conv_step, border_mode=border_mode, name='conv_{}_1'.format(i))) conv_layers2.append(conv_activation[i]) conv_layers2.append(MaxPooling(pooling_size, name='pool_{}_1'.format(i))) # --------------------------------------------------------------- # Building both sequences and merge them by tensor.concatenate # --------------------------------------------------------------- conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_size,weights_init=Uniform(width=0.2), biases_init=Constant(0.), name='conv_sequence_0') conv_sequence2 = ConvolutionalSequence(conv_layers2, num_channels, image_size=image_size,weights_init=Uniform(width=0.2), biases_init=Constant(0.), name='conv_sequence_1') conv_sequence.initialize() conv_out1 = Flattener(name='flattener_0').apply(conv_sequence.apply(x)) conv_out2 = Flattener(name='flattener_1').apply(conv_sequence2.apply(x2)) conv_out = tensor.concatenate([conv_out1,conv_out2],axis=1) top_mlp_dims = [2*numpy.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size] top_mlp = MLP(mlp_activation, top_mlp_dims,weights_init=GlorotInitialization(),biases_init=Constant(0.)) top_mlp.initialize() predict = top_mlp.apply(conv_out) # --------------------------------------------------------------- # Building computational graph # --------------------------------------------------------------- cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict)
def build_and_run(label, config): ############## CREATE THE NETWORK ############### #Define the parameters num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config['num_epochs'], config['num_batches'], config['num_channels'], config['image_shape'], config['filter_size'], config['num_filter'], config['pooling_sizes'], config['mlp_hiddens'], config['output_size'], config['batch_size'], config['activation'], config['mlp_activation'] # print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation) lambda_l1 = 0.000025 lambda_l2 = 0.000025 print("Building model") #Create the symbolics variable x = T.tensor4('image_features') y = T.lmatrix('targets') #Get the parameters conv_parameters = zip(filter_size, num_filter) #Create the convolutions layers conv_layers = list(interleave([(Convolutional( filter_size=filter_size, num_filters=num_filter, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)), (activation), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) # (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize() #Get the output predict = mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') ########### REGULARIZATION ################## cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) biases = VariableFilter(roles=[BIAS])(cg.variables) # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer l2_penalty = T.sum([lambda_l2 * (W ** 2).sum() for i,W in enumerate(weights+biases)]) # Gradually increase penalty for layer # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases]) # # #l2_penalty = l2_penalty_weights + l2_penalty_bias l2_penalty.name = 'l2_penalty' l1_penalty = T.sum([lambda_l1*T.abs_(z).sum() for z in weights+biases]) # l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer # l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases]) # l1_penalty = l1_penalty_biases + l1_penalty_weights l1_penalty.name = 'l1_penalty' costreg = cost + l2_penalty + l1_penalty costreg.name = 'costreg' ########### DEFINE THE ALGORITHM ############# # algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum()) algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam()) ########### GET THE DATA ##################### istest = 'test' in config.keys() train_stream, valid_stream, test_stream = get_stream(batch_size,image_shape,test=istest) ########### INITIALIZING EXTENSIONS ########## checkpoint = Checkpoint('models/best_'+label+'.tar') checkpoint.add_condition(['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far')) #Adding a live plot with the bokeh server plot = Plot(label, channels=[['train_error_rate', 'valid_error_rate'], ['valid_cost', 'valid_error_rate2'], # ['train_costreg','train_grad_norm']], # ['train_costreg','train_total_gradient_norm','train_l2_penalty','train_l1_penalty']], server_url="http://hades.calculquebec.ca:5042") grad_norm = aggregation.mean(algorithm.total_gradient_norm) grad_norm.name = 'grad_norm' extensions = [Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"), TrainingDataMonitoring([costreg, error_rate, error_rate2, grad_norm,l2_penalty,l1_penalty], prefix="train", after_epoch=True), plot, ProgressBar(), Printing(), TrackTheBest('valid_error_rate',min), #Keep best checkpoint, #Save best FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4)] # Early-stopping model = Model(cost) main_loop = MainLoop(algorithm,data_stream=train_stream,model=model,extensions=extensions) main_loop.run()
for i, (filter_size,num_filter,pooling_size) in enumerate(conv_parameters): conv_layers.append(SpatialBatchNormalization(name='sbn_{}'.format(i))) conv_layers.append( Convolutional( filter_size=filter_size, num_filters=num_filter, step=conv_step, border_mode=border_mode, name='conv_{}'.format(i))) conv_layers.append(conv_activation[i]) conv_layers.append(MaxPooling(pooling_size, name='pool_{}'.format(i))) conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_size,weights_init=Uniform(width=0.2), biases_init=Constant(0.)) conv_sequence.initialize() out = Flattener().apply(conv_sequence.apply(x)) top_mlp_dims = [numpy.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size] top_mlp = MLP(mlp_activation, top_mlp_dims,weights_init=GlorotInitialization(),biases_init=Constant(0.)) top_mlp.initialize() predict = top_mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') cg = ComputationGraph([cost, error_rate]) inputs = VariableFilter(roles=[INPUT])(cg.variables) linear_inputs_index = [-10,-8,6] linear_inputs = list(itemgetter(*linear_inputs_index)(inputs))
def run_experiment(): np.random.seed(42) X = tensor.tensor4('features') nbr_channels = 3 image_shape = (5, 5) conv_layers = [ ConvolutionalLayer( filter_size=(2, 2), num_filters=10, activation=Rectifier().apply, border_mode='valid', pooling_size=(1, 1), weights_init=Uniform(width=0.1), #biases_init=Uniform(width=0.01), biases_init=Constant(0.0), name='conv0') ] conv_sequence = ConvolutionalSequence(conv_layers, num_channels=nbr_channels, image_size=image_shape) #conv_sequence.push_allocation_config() conv_sequence.initialize() flattener = Flattener() conv_output = conv_sequence.apply(X) y_hat = flattener.apply(conv_output) # Whatever. Not important since we're not going to actually train anything. cost = tensor.sqr(y_hat).sum() #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)] L_grads_method_02 = [ tensor.grad(cost, v) for v in VariableFilter( roles=[BIAS])(ComputationGraph([y_hat]).variables) ] # works on the sum of the gradients in a mini-batch sum_square_norm_gradients_method_02 = sum( [tensor.sqr(g).sum() for g in L_grads_method_02]) D_by_layer = get_conv_layers_transformation_roles( ComputationGraph(conv_output)) individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations( D_by_layer, cost) # why does this thing depend on N again ? # I don't think I've used a cost that divides by N. N = 2 Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32) #Xtrain[1:,:,:,:] = 0.0 Xtrain[:, :, :, :] = 1.0 convolution_filter_variable = VariableFilter(roles=[FILTER])( ComputationGraph([y_hat]).variables)[0] convolution_filter_variable_value = convolution_filter_variable.get_value() convolution_filter_variable_value[:, :, :, :] = 1.0 #convolution_filter_variable_value[0,0,:,:] = 1.0 convolution_filter_variable.set_value(convolution_filter_variable_value) f = theano.function([X], [ cost, individual_sum_square_norm_gradients_method_00, sum_square_norm_gradients_method_02 ]) [c, v0, gs2] = f(Xtrain) #print "[c, v0, gs2]" L_c, L_v0, L_gs2 = ([], [], []) for n in range(N): [nc, nv0, ngs2] = f(Xtrain[n, :, :, :].reshape( (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3]))) L_c.append(nc) L_v0.append(nv0) L_gs2.append(ngs2) print "Cost for whole mini-batch in single shot : %f." % c print "Cost for whole mini-batch accumulated : %f." % sum(L_c) print "" print "Square-norm of all gradients for each data point in single shot :" print v0.reshape((1, -1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs2).reshape((1, -1)) print "" print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2))) print "" print "Ratios : " print np.array(L_gs2).reshape((1, -1)) / v0.reshape((1, -1))
def create_network(inputs=None, batch=batch_size): if inputs is None: inputs = T.tensor4('features') x = T.cast(inputs, 'float32') x = x / 255. if dataset != 'binarized_mnist' else x # PixelCNN architecture conv_list = [ ConvolutionalNoFlip(*first_layer, mask='A', name='0'), Rectifier() ] for i in range(n_layer): conv_list.extend([ ConvolutionalNoFlip(*second_layer, mask='B', name=str(i + 1)), Rectifier() ]) conv_list.extend([ ConvolutionalNoFlip((1, 1), h * n_channel, mask='B', name=str(n_layer + 1)), Rectifier() ]) conv_list.extend([ ConvolutionalNoFlip((1, 1), h * n_channel, mask='B', name=str(n_layer + 2)), Rectifier() ]) conv_list.extend( [ConvolutionalNoFlip(*third_layer, mask='B', name=str(n_layer + 3))]) sequence = ConvolutionalSequence(conv_list, num_channels=n_channel, batch_size=batch, image_size=(img_dim, img_dim), border_mode='half', weights_init=IsotropicGaussian(std=0.05, mean=0), biases_init=Constant(0.02), tied_biases=False) sequence.initialize() x = sequence.apply(x) if MODE == '256ary': x = x.reshape( (-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0, 2, 3, 4, 1) x = x.reshape((-1, 256)) x_hat = Softmax().apply(x) inp = T.cast(inputs, 'int64').flatten() cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim cost_bits_dim = categorical_crossentropy(log_softmax(x), inp) else: x_hat = Logistic().apply(x) cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim #cost = T.nnet.binary_crossentropy(x_hat, inputs) #cost = cost.sum() / inputs.shape[0] cost_bits_dim = -(inputs * T.log2(x_hat) + (1.0 - inputs) * T.log2(1.0 - x_hat)).mean() cost_bits_dim.name = "nnl_bits_dim" cost.name = 'loglikelihood_nat' return cost, cost_bits_dim
# pooling_size, num_channels, conv_step=(1, 1), pooling_step=None, batch_size=None, # image_size=None, border_mode='valid', tied_biases=False, **kwargs) conv_layers = [ ConvolutionalLayer(Rectifier().apply, filter_size_1, num_filters_1, pooling_size_1, name='conv_1'), ConvolutionalLayer(Rectifier().apply, filter_size_2, num_filters_2, pooling_size_2, name='conv_2')] convnet = ConvolutionalSequence(conv_layers, num_channels=num_channels, image_size=(num_rows, num_cols), weights_init=IsotropicGaussian(0.1), biases_init=Constant(0)) # Apply ( aka instantiate this part of the computational graph ) features = Flattener().apply(convnet.apply(x)) # features = Feedforward((convnet.apply(x))) # features = convnet.apply(x) for i, l in enumerate(convnet.layers): print("Layer {0}: {1} inputs and {2} outputs.".format(i, l.get_dim('input_'), l.get_dim('output'))) # Initialize ( aka fill the theano variables representing parameters with values ) print("Initializing the convnet..") convnet.initialize() # Get the dimensionality of the last layer conv_out_dim = np.prod(convnet.layers[-1].get_dim('output')) print("Output dimensionality of the ConvNet: {0}".format(conv_out_dim)) # Define fully connected layers
def run_experiment(): np.random.seed(42) #X = tensor.matrix('features') X = tensor.tensor4('features') y = tensor.matrix('targets') nbr_channels = 3 image_shape = (30, 30) conv_layers = [ ConvolutionalLayer( filter_size=(4,4), num_filters=10, activation=Rectifier().apply, border_mode='full', pooling_size=(1,1), weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name='conv0'), ConvolutionalLayer( filter_size=(3,3), num_filters=14, activation=Rectifier().apply, border_mode='full', pooling_size=(1,1), weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name='conv1')] conv_sequence = ConvolutionalSequence( conv_layers, num_channels=nbr_channels, image_size=image_shape) #conv_sequence.push_allocation_config() conv_sequence.initialize() conv_output_dim = np.prod(conv_sequence.get_dim('output')) #conv_output_dim = 25*25 flattener = Flattener() mlp = MLP( activations=[Rectifier(), Rectifier(), Softmax()], dims=[conv_output_dim, 50, 50, 10], weights_init=IsotropicGaussian(std=0.1), biases_init=IsotropicGaussian(std=0.01)) mlp.initialize() conv_output = conv_sequence.apply(X) y_hat = mlp.apply(flattener.apply(conv_output)) cost = CategoricalCrossEntropy().apply(y, y_hat) #cost = CategoricalCrossEntropy().apply(y_hat, y) #cost = BinaryCrossEntropy().apply(y.flatten(), y_hat.flatten()) cg = ComputationGraph([y_hat]) """ print "--- INPUT ---" for v in VariableFilter(bricks=mlp.linear_transformations, roles=[INPUT])(cg.variables): print v.tag.annotations[0].name print "--- OUTPUT ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables): print v.tag.annotations[0].name print "--- WEIGHT ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables): print v.tag.annotations[0].name print "--- BIAS ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables): print v.tag.annotations[0].name """ # check out .tag on the variables to see which layer they belong to print "----------------------------" D_by_layer = get_linear_transformation_roles(mlp, cg) # returns a vector with one entry for each in the mini-batch individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_linear_transformations(D_by_layer, cost) #import pprint #pp = pprint.PrettyPrinter(indent=4) #pp.pprint(get_conv_layers_transformation_roles(ComputationGraph(conv_output)).items()) D_by_layer = get_conv_layers_transformation_roles(ComputationGraph(conv_output)) individual_sum_square_norm_gradients_method_00 += get_sum_square_norm_gradients_conv_transformations(D_by_layer, cost) print "There are %d entries in cg.parameters." % len(cg.parameters) L_grads_method_01 = [tensor.grad(cost, p) for p in cg.parameters] L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[WEIGHT, BIAS])(cg.variables)] # works on the sum of the gradients in a mini-batch sum_square_norm_gradients_method_01 = sum([tensor.sqr(g).sum() for g in L_grads_method_01]) sum_square_norm_gradients_method_02 = sum([tensor.sqr(g).sum() for g in L_grads_method_02]) N = 8 Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32) # Option 1. ytrain = np.zeros((N, 10), dtype=np.float32) for n in range(N): label = np.random.randint(low=0, high=10) ytrain[n, label] = 1.0 # Option 2, just to debug situations with NaN. #ytrain = np.random.rand(N, 10).astype(np.float32) #for n in range(N): # ytrain[n,:] = ytrain[n,:] / ytrain[n,:].sum() f = theano.function([X,y], [cost, individual_sum_square_norm_gradients_method_00, sum_square_norm_gradients_method_01, sum_square_norm_gradients_method_02]) [c, v0, gs1, gs2] = f(Xtrain, ytrain) #print "[c, v0, gs1, gs2]" L_c, L_v0, L_gs1, L_gs2 = ([], [], [], []) for n in range(N): [nc, nv0, ngs1, ngs2] = f(Xtrain[n,:].reshape((1,Xtrain.shape[1],Xtrain.shape[2], Xtrain.shape[3])), ytrain[n,:].reshape((1,10))) L_c.append(nc) L_v0.append(nv0) L_gs1.append(ngs1) L_gs2.append(ngs2) print "Cost for whole mini-batch in single shot : %f." % c print "Cost for whole mini-batch accumulated : %f." % sum(L_c) print "" print "Square-norm of all gradients for each data point in single shot :" print v0.reshape((1,-1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs1).reshape((1,-1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs2).reshape((1,-1)) print "" print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs1))) print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2))) print "" print "Ratios : " print np.array(L_gs1).reshape((1,-1)) / v0.reshape((1,-1))
def Fire(image_shape, num_input, conv1, conv2, conv3, out, i): layers11 = [] layers12 = [] layers13 = [] layers14 = [] ############# SQUEEZE ########### ### 4 Conv 1x1 ### layers11.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers11.append(BatchNormalization(name='batch_{}'.format(i))) layers11.append(Rectifier()) conv_sequence11 = ConvolutionalSequence(layers11, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence11.initialize() out11 = conv_sequence11.apply(out) i = i + 1 layers12.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers12.append(BatchNormalization(name='batch_{}'.format(i))) layers12.append(Rectifier()) conv_sequence12 = ConvolutionalSequence(layers12, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence12.initialize() out12 = conv_sequence12.apply(out) i = i + 1 layers13.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers13.append(BatchNormalization(name='batch_{}'.format(i))) layers13.append(Rectifier()) conv_sequence13 = ConvolutionalSequence(layers13, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence13.initialize() out13 = conv_sequence13.apply(out) i = i + 1 layers14.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers14.append(BatchNormalization(name='batch_{}'.format(i))) layers14.append(Rectifier()) conv_sequence14 = ConvolutionalSequence(layers14, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence14.initialize() out14 = conv_sequence14.apply(out) i = i + 1 squeezed = T.concatenate([out11, out12, out13, out14], axis=1) ####### EXPAND ##### layers21 = [] layers22 = [] layers23 = [] layers24 = [] layers31 = [] layers32 = [] layers33 = [] layers34 = [] num_input2 = conv1 * 4 ### 4 conv 1x1 ### layers21.append(Convolutional(filter_size=(1,1), num_channels=num_input2, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers21.append(BatchNormalization(name='batch_{}'.format(i))) layers21.append(Rectifier()) conv_sequence21 = ConvolutionalSequence(layers21, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence21.initialize() out21 = conv_sequence21.apply(squeezed) i = i + 1 layers22.append(Convolutional(filter_size=(1,1), num_channels=num_input2, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers22.append(BatchNormalization(name='batch_{}'.format(i))) layers22.append(Rectifier()) conv_sequence22 = ConvolutionalSequence(layers22, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence22.initialize() out22 = conv_sequence22.apply(squeezed) i = i + 1 layers23.append(Convolutional(filter_size=(1,1), num_channels=num_input2, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers23.append(BatchNormalization(name='batch_{}'.format(i))) layers23.append(Rectifier()) conv_sequence23 = ConvolutionalSequence(layers23, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence23.initialize() out23 = conv_sequence23.apply(squeezed) i = i + 1 layers24.append(Convolutional(filter_size=(1,1), num_channels=num_input2, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers24.append(BatchNormalization(name='batch_{}'.format(i))) layers24.append(Rectifier()) conv_sequence24 = ConvolutionalSequence(layers24, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence24.initialize() out24 = conv_sequence24.apply(squeezed) i = i + 1 ### 4 conv 3x3 ### layers31.append(Convolutional(filter_size=(3,3), num_channels=num_input2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers31.append(BatchNormalization(name='batch_{}'.format(i))) layers31.append(Rectifier()) conv_sequence31 = ConvolutionalSequence(layers31, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence31.initialize() out31 = conv_sequence31.apply(squeezed) i = i + 1 layers32.append(Convolutional(filter_size=(3,3), num_channels=num_input2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers32.append(BatchNormalization(name='batch_{}'.format(i))) layers32.append(Rectifier()) conv_sequence32 = ConvolutionalSequence(layers32, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence32.initialize() out32 = conv_sequence32.apply(squeezed) i = i + 1 layers33.append(Convolutional(filter_size=(3,3), num_channels=num_input2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers33.append(BatchNormalization(name='batch_{}'.format(i))) layers33.append(Rectifier()) conv_sequence33 = ConvolutionalSequence(layers33, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence33.initialize() out33 = conv_sequence33.apply(squeezed) i = i + 1 layers34.append(Convolutional(filter_size=(3,3), num_channels=num_input2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i))) layers34.append(BatchNormalization(name='batch_{}'.format(i))) layers34.append(Rectifier()) conv_sequence34 = ConvolutionalSequence(layers34, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i)) conv_sequence34.initialize() out34 = conv_sequence34.apply(squeezed) i = i + 1 #Merge return T.concatenate([out21, out22, out23, out24, out31, out32, out33, out34], axis=1)
def test_convolutional_layer(): batch_size=2 x = T.tensor4(); y = T.ivector() V = 200 layer_conv = Convolutional(filter_size=(5,5),num_filters=V, name="toto", weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) # try with no bias activation = Rectifier() pool = MaxPooling(pooling_size=(2,2)) convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15, image_size=(10,10), name="conv_section") convnet.push_allocation_config() convnet.initialize() output=convnet.apply(x) batch_size=output.shape[0] output_dim=np.prod(convnet.get_dim('output')) result_conv = output.reshape((batch_size, output_dim)) mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0.0)) mlp.initialize() output=mlp.apply(result_conv) cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output)) cg = ComputationGraph(cost) W = VariableFilter(roles=[WEIGHT])(cg.variables) B = VariableFilter(roles=[BIAS])(cg.variables) W = W[-1]; b = B[-1] print W.shape.eval() print b.shape.eval() import pdb pdb.set_trace() inputs_conv = VariableFilter(roles=[INPUT], bricks=[Convolutional])(cg) outputs_conv = VariableFilter(roles=[OUTPUT], bricks=[Convolutional])(cg) var_input=inputs_conv[0] var_output=outputs_conv[0] [d_W,d_S,d_b] = T.grad(cost, [W, var_output, b]) import pdb pdb.set_trace() w_shape = W.shape.eval() d_W = d_W.reshape((w_shape[0], w_shape[1]*w_shape[2]*w_shape[3])) d_b = T.zeros((w_shape[0],6*6)) #d_b = d_b.reshape((w_shape[0], 8*8)) d_p = T.concatenate([d_W, d_b], axis=1) d_S = d_S.dimshuffle((1, 0, 2, 3)).reshape((w_shape[0], batch_size, 6*6)).reshape((w_shape[0], batch_size*6*6)) #d_S = d_S.reshape((2,200, 64)) #x_value=1e3*np.random.ranf((1,15,10,10)) x_value = 1e3*np.random.ranf((2,15, 10, 10)) f = theano.function([x,y], [var_input, d_S, d_W], allow_input_downcast=True, on_unused_input='ignore') A, B, C= f(x_value, [5, 5]) print np.mean(B) return E_A = expansion_op(A, (2, 15, 10, 10), (5,5)) print E_A.shape E_A = E_A.reshape((2*36, C.shape[1])) print E_A.shape tmp = C - np.dot(B, E_A) print lin.norm(tmp, 'fro')
ConvolutionalLayer(Rectifier().apply, (3, 3), 32, (2, 2), name='l2') ] convnet = ConvolutionalSequence(conv_layers, num_channels=1, image_size=(28, 28), weights_init=IsotropicGaussian(0.1), biases_init=Constant(0)) convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) print(output_dim) # Fully connected layers features = Flattener().apply(convnet.apply(x)) mlp = MLP(activations=[Rectifier(), None], dims=[output_dim, 100, 10], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) mlp.initialize() y_hat = mlp.apply(features) # numerically stable softmax cost = Softmax().categorical_cross_entropy(y.flatten(), y_hat) cost.name = 'nll' error_rate = MisclassificationRate().apply(y.flatten(), y_hat) #cost = MisclassificationRate().apply(y, y_hat) #cost.name = 'error_rate'
def build_submodel(input_shape, output_dim, L_dim_conv_layers, L_filter_size, L_pool_size, L_activation_conv, L_dim_full_layers, L_activation_full, L_exo_dropout_conv_layers, L_exo_dropout_full_layers, L_endo_dropout_conv_layers, L_endo_dropout_full_layers, L_border_mode=None, L_filter_step=None, L_pool_step=None): # TO DO : target size and name of the features x = T.tensor4('features') y = T.imatrix('targets') assert len(input_shape) == 3, "input_shape must be a 3d tensor" num_channels = input_shape[0] image_size = tuple(input_shape[1:]) print image_size print num_channels prediction = output_dim # CONVOLUTION output_conv = x output_dim = num_channels*np.prod(image_size) conv_layers = [] assert len(L_dim_conv_layers) == len(L_filter_size) if L_filter_step is None: L_filter_step = [None] * len(L_dim_conv_layers) assert len(L_dim_conv_layers) == len(L_pool_size) if L_pool_step is None: L_pool_step = [None] * len(L_dim_conv_layers) assert len(L_dim_conv_layers) == len(L_pool_step) assert len(L_dim_conv_layers) == len(L_activation_conv) if L_border_mode is None: L_border_mode = ["valid"] * len(L_dim_conv_layers) assert len(L_dim_conv_layers) == len(L_border_mode) assert len(L_dim_conv_layers) == len(L_endo_dropout_conv_layers) assert len(L_dim_conv_layers) == len(L_exo_dropout_conv_layers) # regarding the batch dropout : the dropout is applied on the filter # which is equivalent to the output dimension # you have to look at the dropout_rate of the next layer # that is why we need to have the first dropout value of L_exo_dropout_full_layers # the first value has to be 0.0 in this context, and we'll # assume that it is, but let's have an assert assert L_exo_dropout_conv_layers[0] == 0.0, "L_exo_dropout_conv_layers[0] has to be 0.0 in this context. There are ways to make it work, of course, but we don't support this with this scripts." # here modifitication of L_exo_dropout_conv_layers L_exo_dropout_conv_layers = L_exo_dropout_conv_layers[1:] + [L_exo_dropout_full_layers[0]] if len(L_dim_conv_layers): for (num_filters, filter_size, filter_step, pool_size, pool_step, activation_str, border_mode, dropout, index) in zip(L_dim_conv_layers, L_filter_size, L_filter_step, L_pool_size, L_pool_step, L_activation_conv, L_border_mode, L_exo_dropout_conv_layers, xrange(len(L_dim_conv_layers)) ): # convert filter_size and pool_size in tuple filter_size = tuple(filter_size) if filter_step is None: filter_step = (1, 1) else: filter_step = tuple(filter_step) if pool_size is None: pool_size = (0,0) else: pool_size = tuple(pool_size) # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier().apply elif activation_str.lower() == 'tanh': activation = Tanh().apply elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic().apply elif activation_str.lower() in ['id', 'identity']: activation = Identity().apply else: raise Exception("unknown activation function : %s", activation_str) assert 0.0 <= dropout and dropout < 1.0 num_filters = num_filters - int(num_filters*dropout) print "border_mode : %s" % border_mode # filter_step # http://blocks.readthedocs.org/en/latest/api/bricks.html#module-blocks.bricks.conv kwargs = {} if filter_step is None or filter_step == (1,1): pass else: # there's a bit of a mix of names because `Convolutional` takes # a "step" argument, but `ConvolutionActivation` takes "conv_step" argument kwargs['conv_step'] = filter_step if (pool_size[0] == 0 and pool_size[1] == 0): layer_conv = ConvolutionalActivation(activation=activation, filter_size=filter_size, num_filters=num_filters, border_mode=border_mode, name="layer_%d" % index, **kwargs) else: if pool_step is None: pass else: kwargs['pooling_step'] = tuple(pool_step) layer_conv = ConvolutionalLayer(activation=activation, filter_size=filter_size, num_filters=num_filters, border_mode=border_mode, pooling_size=pool_size, name="layer_%d" % index, **kwargs) conv_layers.append(layer_conv) convnet = ConvolutionalSequence(conv_layers, num_channels=num_channels, image_size=image_size, weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name="conv_section") convnet.push_allocation_config() convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) output_conv = convnet.apply(output_conv) output_conv = Flattener().apply(output_conv) # FULLY CONNECTED output_mlp = output_conv full_layers = [] assert len(L_dim_full_layers) == len(L_activation_full) assert len(L_dim_full_layers) + 1 == len(L_endo_dropout_full_layers) assert len(L_dim_full_layers) + 1 == len(L_exo_dropout_full_layers) # reguarding the batch dropout : the dropout is applied on the filter # which is equivalent to the output dimension # you have to look at the dropout_rate of the next layer # that is why we throw away the first value of L_exo_dropout_full_layers L_exo_dropout_full_layers = L_exo_dropout_full_layers[1:] pre_dim = output_dim print "When constructing the model, the output_dim of the conv section is %d." % output_dim if len(L_dim_full_layers): for (dim, activation_str, dropout, index) in zip(L_dim_full_layers, L_activation_full, L_exo_dropout_full_layers, range(len(L_dim_conv_layers), len(L_dim_conv_layers)+ len(L_dim_full_layers)) ): # TO DO : leaky relu if activation_str.lower() == 'rectifier': activation = Rectifier().apply elif activation_str.lower() == 'tanh': activation = Tanh().apply elif activation_str.lower() in ['sigmoid', 'logistic']: activation = Logistic().apply elif activation_str.lower() in ['id', 'identity']: activation = Identity().apply else: raise Exception("unknown activation function : %s", activation_str) assert 0.0 <= dropout and dropout < 1.0 dim = dim - int(dim*dropout) print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % (dropout, pre_dim, dim) layer_full = MLP(activations=[activation], dims=[pre_dim, dim], weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name="layer_%d" % index) layer_full.initialize() full_layers.append(layer_full) pre_dim = dim for layer in full_layers: output_mlp = layer.apply(output_mlp) output_dim = L_dim_full_layers[-1] - int(L_dim_full_layers[-1]*L_exo_dropout_full_layers[-1]) # COST FUNCTION output_layer = Linear(output_dim, prediction, weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name="layer_"+str(len(L_dim_conv_layers)+ len(L_dim_full_layers)) ) output_layer.initialize() full_layers.append(output_layer) y_pred = output_layer.apply(output_mlp) y_hat = Softmax().apply(y_pred) # SOFTMAX and log likelihood y_pred = Softmax().apply(y_pred) # be careful. one version expects the output of a softmax; the other expects just the # output of the network cost = CategoricalCrossEntropy().apply(y.flatten(), y_pred) #cost = Softmax().categorical_cross_entropy(y.flatten(), y_pred) cost.name = "cost" # Misclassification error_rate_brick = MisclassificationRate() error_rate = error_rate_brick.apply(y.flatten(), y_hat) error_rate.name = "error_rate" # put names D_params, D_kind = build_params(x, T.matrix(), conv_layers, full_layers) # test computation graph cg = ComputationGraph(cost) # DROPOUT L_endo_dropout = L_endo_dropout_conv_layers + L_endo_dropout_full_layers cg_dropout = cg inputs = VariableFilter(roles=[INPUT])(cg.variables) for (index, drop_rate) in enumerate(L_endo_dropout): for input_ in inputs: m = re.match(r"layer_(\d+)_apply.*", input_.name) if m and index == int(m.group(1)): if drop_rate < 0.0001: print "Skipped applying dropout on %s because the dropout rate was under 0.0001." % input_.name break else: cg_dropout = apply_dropout(cg, [input_], drop_rate) print "Applied dropout %f on %s." % (drop_rate, input_.name) break cg = cg_dropout return (cg, error_rate, cost, D_params, D_kind)
def build_and_run(label, config): ############## CREATE THE NETWORK ############### #Define the parameters num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config[ 'num_epochs'], config['num_batches'], config['num_channels'], config[ 'image_shape'], config['filter_size'], config[ 'num_filter'], config['pooling_sizes'], config[ 'mlp_hiddens'], config['output_size'], config[ 'batch_size'], config['activation'], config[ 'mlp_activation'] # print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation) lambda_l1 = 0.000025 lambda_l2 = 0.000025 print("Building model") #Create the symbolics variable x = T.tensor4('image_features') y = T.lmatrix('targets') #Get the parameters conv_parameters = zip(filter_size, num_filter) #Create the convolutions layers conv_layers = list( interleave([(Convolutional(filter_size=filter_size, num_filters=num_filter, name='conv_{}'.format(i)) for i, (filter_size, num_filter) in enumerate(conv_parameters)), (activation), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) # (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output')) ] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize() #Get the output predict = mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') ########### REGULARIZATION ################## cg = ComputationGraph([cost]) weights = VariableFilter(roles=[WEIGHT])(cg.variables) biases = VariableFilter(roles=[BIAS])(cg.variables) # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer l2_penalty = T.sum([ lambda_l2 * (W**2).sum() for i, W in enumerate(weights + biases) ]) # Gradually increase penalty for layer # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases]) # # #l2_penalty = l2_penalty_weights + l2_penalty_bias l2_penalty.name = 'l2_penalty' l1_penalty = T.sum([lambda_l1 * T.abs_(z).sum() for z in weights + biases]) # l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer # l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases]) # l1_penalty = l1_penalty_biases + l1_penalty_weights l1_penalty.name = 'l1_penalty' costreg = cost + l2_penalty + l1_penalty costreg.name = 'costreg' ########### DEFINE THE ALGORITHM ############# # algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum()) algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam()) ########### GET THE DATA ##################### istest = 'test' in config.keys() train_stream, valid_stream, test_stream = get_stream(batch_size, image_shape, test=istest) ########### INITIALIZING EXTENSIONS ########## checkpoint = Checkpoint('models/best_' + label + '.tar') checkpoint.add_condition( ['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far')) #Adding a live plot with the bokeh server plot = Plot( label, channels=[ ['train_error_rate', 'valid_error_rate'], ['valid_cost', 'valid_error_rate2'], # ['train_costreg','train_grad_norm']], # [ 'train_costreg', 'train_total_gradient_norm', 'train_l2_penalty', 'train_l1_penalty' ] ], server_url="http://hades.calculquebec.ca:5042") grad_norm = aggregation.mean(algorithm.total_gradient_norm) grad_norm.name = 'grad_norm' extensions = [ Timing(), FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches), DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"), TrainingDataMonitoring([ costreg, error_rate, error_rate2, grad_norm, l2_penalty, l1_penalty ], prefix="train", after_epoch=True), plot, ProgressBar(), Printing(), TrackTheBest('valid_error_rate', min), #Keep best checkpoint, #Save best FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4) ] # Early-stopping model = Model(cost) main_loop = MainLoop(algorithm, data_stream=train_stream, model=model, extensions=extensions) main_loop.run()
def __init__(self, rnn_dims, num_actions, data_X_np=None, data_y_np=None, width=32, height=32): ############################################################### # # Network and data setup # ############################################################## RNN_DIMS = 100 NUM_ACTIONS = num_actions tensor5 = T.TensorType('float32', [False, True, True, True, True]) self.x = T.tensor4('features') self.reward = T.tensor3('targets', dtype='float32') self.state = T.matrix('states', dtype='float32') self.hidden_states = [] # holds hidden states in np array form #data_X & data_Y supplied in init function now... if data_X_np is None or data_y_np is None: print 'you did not supply data at init' data_X_np = np.float32(np.random.normal(size=(1280, 1,1, width, height))) data_y_np = np.float32(np.random.normal(size=(1280, 1,1,1))) #data_states_np = np.float32(np.ones((1280, 1, 100))) state_shape = (data_X_np.shape[0],rnn_dims) self.data_states_np = np.float32(np.zeros(state_shape)) self.datastream = IterableDataset(dict(features=data_X_np, targets=data_y_np, states=self.data_states_np)).get_example_stream() self.datastream_test = IterableDataset(dict(features=data_X_np, targets=data_y_np, states=self.data_states_np)).get_example_stream() data_X = self.datastream # 2 conv inputs # we want to take our sequence of input images and convert them to convolutional # representations conv_layers = [ConvolutionalLayer(Rectifier().apply, (3, 3), 16, (2, 2), name='l1'), ConvolutionalLayer(Rectifier().apply, (3, 3), 32, (2, 2), name='l2'), ConvolutionalLayer(Rectifier().apply, (3, 3), 64, (2, 2), name='l3'), ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l4'), ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l5'), ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l6')] convnet = ConvolutionalSequence(conv_layers, num_channels=4, image_size=(width, height), weights_init=init.Uniform(0, 0.01), biases_init=init.Constant(0.0), tied_biases=False, border_mode='full') convnet.initialize() output_dim = np.prod(convnet.get_dim('output')) conv_out = convnet.apply(self.x) reshape_dims = (conv_out.shape[0], conv_out.shape[1]*conv_out.shape[2]*conv_out.shape[3]) hidden_repr = conv_out.reshape(reshape_dims) conv2rnn = Linear(input_dim=output_dim, output_dim=RNN_DIMS, weights_init=init.Uniform(width=0.01), biases_init=init.Constant(0.)) conv2rnn.initialize() conv2rnn_output = conv2rnn.apply(hidden_repr) # RNN hidden layer # then we want to feed those conv representations into an RNN rnn = SimpleRecurrent(dim=RNN_DIMS, activation=Rectifier(), weights_init=init.Uniform(width=0.01)) rnn.initialize() self.learned_state = rnn.apply(inputs=conv2rnn_output, states=self.state, iterate=False) # linear output from hidden layer # the RNN has two outputs, but only this one has a target. That is, this is "expected return" # which the network attempts to minimize difference between expected return and actual return lin_output = Linear(input_dim=RNN_DIMS, output_dim=1, weights_init=init.Uniform(width=0.01), biases_init=init.Constant(0.)) lin_output.initialize() self.exp_reward = lin_output.apply(self.learned_state) self.get_exp_reward = theano.function([self.x, self.state], self.exp_reward) # softmax output from hidden layer # this provides a softmax of action recommendations # the hypothesis is that adjusting the other outputs magically influences this set of outputs # to suggest smarter (or more realistic?) moves action_output = Linear(input_dim=RNN_DIMS, output_dim=NUM_ACTIONS, weights_init=init.Constant(.001), biases_init=init.Constant(0.)) action_output.initialize() self.suggested_actions = Softmax().apply(action_output.apply(self.learned_state[-1])) ###################### # use this to get suggested actions... it requires the state of the hidden units from the previous # timestep ##################### self.get_suggested_actions = theano.function([self.x, self.state], [self.suggested_actions, self.learned_state])
b.ConvolutionalLayer(activation, filter_size, num_filters_, pooling_size, num_channels=3) for filter_size, num_filters_, pooling_size in zip(filter_sizes, num_filters, pooling_sizes) ] convnet = ConvolutionalSequence(conv_layers, num_channels=3, image_size=(32, 32), weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) convnet.initialize() conv_features = Flattener().apply(convnet.apply(X)) # MLP mlp = MLP(activations=[Logistic(name='sigmoid_0'), Softmax(name='softmax_1')], dims=[256, 256, 256, 2], weights_init=IsotropicGaussian(0.01), biases_init=Constant(0)) [child.name for child in mlp.children] ['linear_0', 'sigmoid_0', 'linear_1', 'softmax_1'] Y = mlp.apply(conv_features) mlp.initialize() # Setting up the cost function from blocks.bricks.cost import CategoricalCrossEntropy
conv_layers = list(interleave([(ConvolutionalActivation( filter_size=filter_size, num_filters=num_filter, activation=activation, name='conv_{}'.format(i)) for i, (activation, filter_size, num_filter) in enumerate(conv_parameters)), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize() #Get the output predict = mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') cg = ComputationGraph([cost, error_rate]) ########### GET THE DATA #####################
num_filters=num_filter[j + 1], step=conv_step, border_mode=border_mode, name='conv_{}'.format(i))) conv_layers1.append(BatchNormalization(name='BNconv_{}'.format(i))) conv_layers1.append(conv_activation[0]) conv_layers1.append(MaxPooling(pooling_size[j + 1], name='pool_{}'.format(i))) conv_sequence = ConvolutionalSequence(conv_layers1, num_channels=num_channels, image_size=image_size, weights_init=Uniform(width=0.2), biases_init=Constant(0.), name='ConvSeq_{}'.format(i)) conv_sequence.initialize() out = conv_sequence.apply(x) #out = Flattener().apply(conv_sequence.apply(x)) ################# Convolutional Sequence 2A and 2B ################# # conv_layers2 parameters i = i + 1 #Sequence j = 0 #Sub Layer # 1st and 2nd are sequential; 3rd and 4th are sequential; 2 sequences are concatenated filter_size = [(1, 1), (5, 5), (1, 1), (3, 3)] num_filter = [32, 64, 64, 96] num_channels = 3 pooling_size = None conv_step = (1, 1) border_mode = 'half' out = intranet(i, j, out, image_size, filter_size, num_filter, num_channels,
i = i + 1 #Sequence conv_layers1.append( Convolutional( filter_size=filter_size[j+3], num_filters=num_filter[j+3], step=conv_step, border_mode=border_mode, name='conv_{}'.format(i))) conv_layers1.append(BatchNormalization(name='BNconv_{}'.format(i))) conv_layers1.append(conv_activation[0]) conv_layers1.append(MaxPooling(pooling_size[j+2], name='pool_{}'.format(i))) conv_sequence1 = ConvolutionalSequence(conv_layers1, num_channels=num_channels, image_size=image_size, weights_init=Uniform(width=0.2), biases_init=Constant(0.), name='ConvSeq1_{}'.format(i)) conv_sequence1.initialize() out1 = conv_sequence1.apply(x) ################# Convolutional Sequence 2 ################# # conv_layers2 parameters i = i+1 #Sequence j = 0 #Sub Layer filter_size = [(7,7), (5,5), (2,2), (5,5)] num_filter = [16, 32, 48, 64] num_channels = 3 pooling_size = [(3,3), (2,2), (2,2)] conv_step = (1,1) border_mode = 'valid' conv_layers2 = [] conv_layers2.append(SpatialBatchNormalization(name='spatialBN_{}'.format(i)))
def run_experiment(): np.random.seed(42) #X = tensor.matrix('features') X = tensor.tensor4('features') y = tensor.matrix('targets') nbr_channels = 3 image_shape = (30, 30) conv_layers = [ ConvolutionalLayer(filter_size=(4, 4), num_filters=10, activation=Rectifier().apply, border_mode='full', pooling_size=(1, 1), weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name='conv0'), ConvolutionalLayer(filter_size=(3, 3), num_filters=14, activation=Rectifier().apply, border_mode='full', pooling_size=(1, 1), weights_init=Uniform(width=0.1), biases_init=Constant(0.0), name='conv1') ] conv_sequence = ConvolutionalSequence(conv_layers, num_channels=nbr_channels, image_size=image_shape) #conv_sequence.push_allocation_config() conv_sequence.initialize() conv_output_dim = np.prod(conv_sequence.get_dim('output')) #conv_output_dim = 25*25 flattener = Flattener() mlp = MLP(activations=[Rectifier(), Rectifier(), Softmax()], dims=[conv_output_dim, 50, 50, 10], weights_init=IsotropicGaussian(std=0.1), biases_init=IsotropicGaussian(std=0.01)) mlp.initialize() conv_output = conv_sequence.apply(X) y_hat = mlp.apply(flattener.apply(conv_output)) cost = CategoricalCrossEntropy().apply(y, y_hat) #cost = CategoricalCrossEntropy().apply(y_hat, y) #cost = BinaryCrossEntropy().apply(y.flatten(), y_hat.flatten()) cg = ComputationGraph([y_hat]) """ print "--- INPUT ---" for v in VariableFilter(bricks=mlp.linear_transformations, roles=[INPUT])(cg.variables): print v.tag.annotations[0].name print "--- OUTPUT ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables): print v.tag.annotations[0].name print "--- WEIGHT ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables): print v.tag.annotations[0].name print "--- BIAS ---" #print(VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables)) for v in VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables): print v.tag.annotations[0].name """ # check out .tag on the variables to see which layer they belong to print "----------------------------" D_by_layer = get_linear_transformation_roles(mlp, cg) # returns a vector with one entry for each in the mini-batch individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_linear_transformations( D_by_layer, cost) #import pprint #pp = pprint.PrettyPrinter(indent=4) #pp.pprint(get_conv_layers_transformation_roles(ComputationGraph(conv_output)).items()) D_by_layer = get_conv_layers_transformation_roles( ComputationGraph(conv_output)) individual_sum_square_norm_gradients_method_00 += get_sum_square_norm_gradients_conv_transformations( D_by_layer, cost) print "There are %d entries in cg.parameters." % len(cg.parameters) L_grads_method_01 = [tensor.grad(cost, p) for p in cg.parameters] L_grads_method_02 = [ tensor.grad(cost, v) for v in VariableFilter(roles=[WEIGHT, BIAS])(cg.variables) ] # works on the sum of the gradients in a mini-batch sum_square_norm_gradients_method_01 = sum( [tensor.sqr(g).sum() for g in L_grads_method_01]) sum_square_norm_gradients_method_02 = sum( [tensor.sqr(g).sum() for g in L_grads_method_02]) N = 8 Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32) # Option 1. ytrain = np.zeros((N, 10), dtype=np.float32) for n in range(N): label = np.random.randint(low=0, high=10) ytrain[n, label] = 1.0 # Option 2, just to debug situations with NaN. #ytrain = np.random.rand(N, 10).astype(np.float32) #for n in range(N): # ytrain[n,:] = ytrain[n,:] / ytrain[n,:].sum() f = theano.function([X, y], [ cost, individual_sum_square_norm_gradients_method_00, sum_square_norm_gradients_method_01, sum_square_norm_gradients_method_02 ]) [c, v0, gs1, gs2] = f(Xtrain, ytrain) #print "[c, v0, gs1, gs2]" L_c, L_v0, L_gs1, L_gs2 = ([], [], [], []) for n in range(N): [nc, nv0, ngs1, ngs2] = f( Xtrain[n, :].reshape( (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])), ytrain[n, :].reshape((1, 10))) L_c.append(nc) L_v0.append(nv0) L_gs1.append(ngs1) L_gs2.append(ngs2) print "Cost for whole mini-batch in single shot : %f." % c print "Cost for whole mini-batch accumulated : %f." % sum(L_c) print "" print "Square-norm of all gradients for each data point in single shot :" print v0.reshape((1, -1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs1).reshape((1, -1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs2).reshape((1, -1)) print "" print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs1))) print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2))) print "" print "Ratios : " print np.array(L_gs1).reshape((1, -1)) / v0.reshape((1, -1))
name='conv_2')) layers.append(BatchNormalization(name='batch_2')) layers.append(Rectifier()) layers.append(MaxPooling((3, 3), step=(2, 2), padding=(1, 1), name='pool_2')) #Create the sequence conv_sequence = ConvolutionalSequence(layers, num_channels=3, image_size=(160, 160), weights_init=Orthogonal(), use_bias=False, name='convSeq') #Initialize the convnet conv_sequence.initialize() #Output the first result out = conv_sequence.apply(x) ###############SECOND STAGE##################### out2 = inception((20, 20), 192, 64, 96, 128, 16, 32, 32, out, 10) out3 = inception((20, 20), 256, 128, 128, 192, 32, 96, 64, out2, 20) out31 = MaxPooling((2, 2), name='poolLow').apply(out3) out4 = inception((10, 10), 480, 192, 96, 208, 16, 48, 64, out31, 30) out5 = inception((10, 10), 512, 160, 112, 224, 24, 64, 64, out4, 40) out6 = inception((10, 10), 512, 128, 128, 256, 24, 64, 64, out5, 50) out7 = inception((10, 10), 512, 112, 144, 288, 32, 64, 64, out6, 60) out8 = inception((10, 10), 528, 256, 160, 320, 32, 128, 128, out7, 70) out81 = MaxPooling((2, 2), name='poolLow1').apply(out8) out9 = inception((5, 5), 832, 256, 160, 320, 32, 128, 128, out81, 80) out10 = inception((5, 5), 832, 384, 192, 384, 48, 128, 128, out9, 90)
layers.append(Rectifier()) layers.append(MaxPooling((3, 3), step=(2, 2), padding=(1, 1), name="pool_0")) layers.append(Convolutional(filter_size=(1, 1), num_filters=64, border_mode="half", name="conv_1")) layers.append(Rectifier()) layers.append(Convolutional(filter_size=(3, 3), num_filters=192, border_mode="half", name="conv_2")) layers.append(Rectifier()) layers.append(MaxPooling((3, 3), step=(2, 2), padding=(1, 1), name="pool_2")) # Create the sequence conv_sequence = ConvolutionalSequence( layers, num_channels=3, image_size=(None, None), weights_init=Orthogonal(), use_bias=False, name="convSeq" ) # Initialize the convnet # Output the first result out = conv_sequence.apply(x) ###############SECOND STAGE##################### out2 = inception((None, None), 192, 64, 96, 128, 16, 32, 32, out, 10) out3 = inception((None, None), 256, 128, 128, 192, 32, 96, 64, out2, 20) out31 = MaxPooling((2, 2), name="poolLow").apply(out3) out4 = inception((None, None), 480, 192, 96, 208, 16, 48, 64, out31, 30) out5 = inception((None, None), 512, 160, 112, 224, 24, 64, 64, out4, 40) out6 = inception((None, None), 512, 128, 128, 256, 24, 64, 64, out5, 50) out7 = inception((None, None), 512, 112, 144, 288, 32, 64, 64, out6, 60) out8 = inception((None, None), 528, 256, 160, 320, 32, 128, 128, out7, 70) out81 = MaxPooling((20, 20), name="poolLow1").apply(out8) out9 = inception((None, None), 832, 256, 160, 320, 32, 128, 128, out81, 80) out10 = inception((None, None), 832, 384, 192, 384, 48, 128, 128, out9, 90) out91 = AveragePooling((5, 5), name="poolLow2").apply(out10)
def intranet(i, j, out, image_size, filter_size, num_filter, num_channels, pooling_size, conv_step, border_mode, conv_activation): conv_layersA = [] #first intra convolutional sequence conv_layersA.append( Convolutional(filter_size=filter_size[j], num_filters=num_filter[j], step=conv_step, border_mode=border_mode, name='conv_A{}({})'.format(i, j))) conv_layersA.append(BatchNormalization(name='BNconv_A{}({})'.format(i, j))) conv_layersA.append(conv_activation[0]) j = j + 1 #next sub layer conv_layersA.append( Convolutional(filter_size=filter_size[j], num_filters=num_filter[j], step=conv_step, border_mode=border_mode, name='conv_A{}({})'.format(i, j))) conv_layersA.append(BatchNormalization(name='BNconv_A{}({})'.format(i, j))) conv_layersA.append(conv_activation[0]) conv_sequenceA = ConvolutionalSequence(conv_layersA, num_channels=num_channels, image_size=image_size, weights_init=Uniform(width=0.2), use_bias=False, name='convSeq_A{}'.format(i)) out1 = conv_sequenceA.apply(out) conv_layersB = [] #second intra convolutional sequence j = j + 1 #next sub layer conv_layersB.append( Convolutional(filter_size=filter_size[j], num_filters=num_filter[j], step=conv_step, border_mode=border_mode, name='conv_B{}({})'.format(i, j))) conv_layersB.append(BatchNormalization(name='BNconv_B{}({})'.format(i, j))) conv_layersB.append(conv_activation[0]) j = j + 1 #next sub layer conv_layersB.append( Convolutional(filter_size=filter_size[j], num_filters=num_filter[j], step=conv_step, border_mode=border_mode, name='conv_B{}({})'.format(i, j))) conv_layersB.append(BatchNormalization(name='BNconv_B{}({})'.format(i, j))) conv_layersB.append(conv_activation[0]) conv_sequenceB = ConvolutionalSequence(conv_layersB, num_channels=num_channels, image_size=image_size, weights_init=Uniform(width=0.2), use_bias=False, name='convSeq_B{}'.format(i)) out2 = conv_sequenceB.apply(out) #Merge return tensor.concatenate([out1, out2], axis=1)
def create_network(inputs=None, batch=batch_size): if inputs is None: inputs = T.tensor4('features') x = T.cast(inputs,'float32') x = x / 255. if dataset != 'binarized_mnist' else x # GatedPixelCNN gated = GatedPixelCNN( name='gated_layer_0', filter_size=7, image_size=(img_dim,img_dim), num_filters=h*n_channel, num_channels=n_channel, batch_size=batch, weights_init=IsotropicGaussian(std=0.02, mean=0), biases_init=Constant(0.02), res=False ) gated.initialize() x_v, x_h = gated.apply(x, x) for i in range(n_layer): gated = GatedPixelCNN( name='gated_layer_{}'.format(i+1), filter_size=3, image_size=(img_dim,img_dim), num_channels=h*n_channel, batch_size=batch, weights_init=IsotropicGaussian(std=0.02, mean=0), biases_init=Constant(0.02), res=True ) gated.initialize() x_v, x_h = gated.apply(x_v, x_h) conv_list = [] conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask_type='B', name='1x1_conv_1')]) #conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask='B', name='1x1_conv_2')]) conv_list.extend([Rectifier(), ConvolutionalNoFlip(*third_layer, mask_type='B', name='output_layer')]) sequence = ConvolutionalSequence( conv_list, num_channels=h*n_channel, batch_size=batch, image_size=(img_dim,img_dim), border_mode='half', weights_init=IsotropicGaussian(std=0.02, mean=0), biases_init=Constant(0.02), tied_biases=False ) sequence.initialize() x = sequence.apply(x_h) if MODE == '256ary': x = x.reshape((-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0,2,3,4,1) x = x.reshape((-1, 256)) x_hat = Softmax().apply(x) inp = T.cast(inputs, 'int64').flatten() cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim cost_bits_dim = categorical_crossentropy(log_softmax(x), inp) else: x_hat = Logistic().apply(x) cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim #cost = T.nnet.binary_crossentropy(x_hat, inputs) #cost = cost.sum() / inputs.shape[0] cost_bits_dim = -(inputs * T.log2(x_hat) + (1.0 - inputs) * T.log2(1.0 - x_hat)).mean() cost_bits_dim.name = "nnl_bits_dim" cost.name = 'loglikelihood_nat' return cost, cost_bits_dim
num_filter) in enumerate(conv_parameters)), (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))])) #Create the sequence conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.)) #Initialize the convnet conv_sequence.initialize() #Add the MLP top_mlp_dims = [np.prod(conv_sequence.get_dim('output')) ] + mlp_hiddens + [output_size] out = Flattener().apply(conv_sequence.apply(x)) mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2), biases_init=Constant(0.)) #Initialisze the MLP mlp.initialize() #Get the output predict = mlp.apply(out) cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost') error = MisclassificationRate().apply(y.flatten(), predict) #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason) error_rate = error.copy(name='error_rate') error_rate2 = error.copy(name='error_rate2') cg = ComputationGraph([cost, error_rate])
def create_network(inputs=None, batch=batch_size): if inputs is None: inputs = T.tensor4('features') x = T.cast(inputs, 'float32') x = x / 255. if dataset != 'binarized_mnist' else x # GatedPixelCNN gated = GatedPixelCNN(name='gated_layer_0', filter_size=7, image_size=(img_dim, img_dim), num_filters=h * n_channel, num_channels=n_channel, batch_size=batch, weights_init=IsotropicGaussian(std=0.02, mean=0), biases_init=Constant(0.02), res=False) gated.initialize() x_v, x_h = gated.apply(x, x) for i in range(n_layer): gated = GatedPixelCNN(name='gated_layer_{}'.format(i + 1), filter_size=3, image_size=(img_dim, img_dim), num_channels=h * n_channel, batch_size=batch, weights_init=IsotropicGaussian(std=0.02, mean=0), biases_init=Constant(0.02), res=True) gated.initialize() x_v, x_h = gated.apply(x_v, x_h) conv_list = [] conv_list.extend([ Rectifier(), ConvolutionalNoFlip((1, 1), h * n_channel, mask_type='B', name='1x1_conv_1') ]) #conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask='B', name='1x1_conv_2')]) conv_list.extend([ Rectifier(), ConvolutionalNoFlip(*third_layer, mask_type='B', name='output_layer') ]) sequence = ConvolutionalSequence(conv_list, num_channels=h * n_channel, batch_size=batch, image_size=(img_dim, img_dim), border_mode='half', weights_init=IsotropicGaussian(std=0.02, mean=0), biases_init=Constant(0.02), tied_biases=False) sequence.initialize() x = sequence.apply(x_h) if MODE == '256ary': x = x.reshape( (-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0, 2, 3, 4, 1) x = x.reshape((-1, 256)) x_hat = Softmax().apply(x) inp = T.cast(inputs, 'int64').flatten() cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim cost_bits_dim = categorical_crossentropy(log_softmax(x), inp) else: x_hat = Logistic().apply(x) cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim #cost = T.nnet.binary_crossentropy(x_hat, inputs) #cost = cost.sum() / inputs.shape[0] cost_bits_dim = -(inputs * T.log2(x_hat) + (1.0 - inputs) * T.log2(1.0 - x_hat)).mean() cost_bits_dim.name = "nnl_bits_dim" cost.name = 'loglikelihood_nat' return cost, cost_bits_dim
def run_experiment(): np.random.seed(42) X = tensor.tensor4('features') nbr_channels = 3 image_shape = (5, 5) conv_layers = [ ConvolutionalLayer( filter_size=(2,2), num_filters=10, activation=Rectifier().apply, border_mode='valid', pooling_size=(1,1), weights_init=Uniform(width=0.1), #biases_init=Uniform(width=0.01), biases_init=Constant(0.0), name='conv0')] conv_sequence = ConvolutionalSequence( conv_layers, num_channels=nbr_channels, image_size=image_shape) #conv_sequence.push_allocation_config() conv_sequence.initialize() flattener = Flattener() conv_output = conv_sequence.apply(X) y_hat = flattener.apply(conv_output) # Whatever. Not important since we're not going to actually train anything. cost = tensor.sqr(y_hat).sum() #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)] L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[BIAS])(ComputationGraph([y_hat]).variables)] # works on the sum of the gradients in a mini-batch sum_square_norm_gradients_method_02 = sum([tensor.sqr(g).sum() for g in L_grads_method_02]) D_by_layer = get_conv_layers_transformation_roles(ComputationGraph(conv_output)) individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations(D_by_layer, cost) # why does this thing depend on N again ? # I don't think I've used a cost that divides by N. N = 2 Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32) #Xtrain[1:,:,:,:] = 0.0 Xtrain[:,:,:,:] = 1.0 convolution_filter_variable = VariableFilter(roles=[FILTER])(ComputationGraph([y_hat]).variables)[0] convolution_filter_variable_value = convolution_filter_variable.get_value() convolution_filter_variable_value[:,:,:,:] = 1.0 #convolution_filter_variable_value[0,0,:,:] = 1.0 convolution_filter_variable.set_value(convolution_filter_variable_value) f = theano.function([X], [cost, individual_sum_square_norm_gradients_method_00, sum_square_norm_gradients_method_02]) [c, v0, gs2] = f(Xtrain) #print "[c, v0, gs2]" L_c, L_v0, L_gs2 = ([], [], []) for n in range(N): [nc, nv0, ngs2] = f(Xtrain[n,:, :, :].reshape((1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3]))) L_c.append(nc) L_v0.append(nv0) L_gs2.append(ngs2) print "Cost for whole mini-batch in single shot : %f." % c print "Cost for whole mini-batch accumulated : %f." % sum(L_c) print "" print "Square-norm of all gradients for each data point in single shot :" print v0.reshape((1,-1)) print "Square-norm of all gradients for each data point iteratively :" print np.array(L_gs2).reshape((1,-1)) print "" print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2))) print "" print "Ratios : " print np.array(L_gs2).reshape((1,-1)) / v0.reshape((1,-1))
num_filters=192, border_mode='half', name='conv_2')) layers.append(Rectifier()) layers.append(MaxPooling((3, 3), step=(2, 2), padding=(1, 1), name='pool_2')) #Create the sequence conv_sequence = ConvolutionalSequence(layers, num_channels=3, image_size=(None, None), weights_init=Orthogonal(), use_bias=False, name='convSeq') #Initialize the convnet #Output the first result out = conv_sequence.apply(x) ###############SECOND STAGE##################### out2 = inception((None, None), 192, 64, 96, 128, 16, 32, 32, out, 10) out3 = inception((None, None), 256, 128, 128, 192, 32, 96, 64, out2, 20) out31 = MaxPooling((2, 2), name='poolLow').apply(out3) out4 = inception((None, None), 480, 192, 96, 208, 16, 48, 64, out31, 30) out5 = inception((None, None), 512, 160, 112, 224, 24, 64, 64, out4, 40) out6 = inception((None, None), 512, 128, 128, 256, 24, 64, 64, out5, 50) out7 = inception((None, None), 512, 112, 144, 288, 32, 64, 64, out6, 60) out8 = inception((None, None), 528, 256, 160, 320, 32, 128, 128, out7, 70) out81 = MaxPooling((20, 20), name='poolLow1').apply(out8) out9 = inception((None, None), 832, 256, 160, 320, 32, 128, 128, out81, 80) out10 = inception((None, None), 832, 384, 192, 384, 48, 128, 128, out9, 90) out91 = AveragePooling((5, 5), name='poolLow2').apply(out10)