コード例 #1
0
ファイル: test_bn.py プロジェクト: ck624/dependency-parser
def test_batch_normalization_inside_convolutional_sequence():
    """Test that BN bricks work in ConvolutionalSequences."""
    conv_seq = ConvolutionalSequence(
        [Convolutional(filter_size=(3, 3), num_filters=4),
         BatchNormalization(broadcastable=(False, True, True)),
         AveragePooling(pooling_size=(2, 2)),
         BatchNormalization(broadcastable=(False, False, False)),
         MaxPooling(pooling_size=(2, 2), step=(1, 1))],
        weights_init=Constant(1.),
        biases_init=Constant(2.),
        image_size=(10, 8), num_channels=9)

    conv_seq_no_bn = ConvolutionalSequence(
        [Convolutional(filter_size=(3, 3), num_filters=4),
         AveragePooling(pooling_size=(2, 2)),
         MaxPooling(pooling_size=(2, 2), step=(1, 1))],
        weights_init=Constant(1.),
        biases_init=Constant(2.),
        image_size=(10, 8), num_channels=9)

    conv_seq.initialize()
    conv_seq_no_bn.initialize()
    rng = numpy.random.RandomState((2015, 12, 17))
    input_ = random_unif(rng, (2, 9, 10, 8))

    x = theano.tensor.tensor4()
    ybn = conv_seq.apply(x)
    y = conv_seq_no_bn.apply(x)
    yield (assert_equal, ybn.eval({x: input_}), y.eval({x: input_}))

    std = conv_seq.children[-2].population_stdev
    std.set_value(3 * std.get_value(borrow=True))
    yield (assert_equal, ybn.eval({x: input_}), y.eval({x: input_}) / 3.)
コード例 #2
0
class EncoderMapping(Initializable):
    """
    Parameters
    ----------
    layers: :class:`list`
        list of bricks
    num_channels: :class: `int`
           Number of input channels
    image_size: :class:`tuple`
        Image size
    n_emb: :class:`int`
        Dimensionality of the embedding
    use_bias: :class:`bool`
        self explanatory
    """
    def __init__(self,
                 layers,
                 num_channels,
                 image_size,
                 n_emb,
                 use_bias=False,
                 **kwargs):
        self.layers = layers
        self.num_channels = num_channels
        self.image_size = image_size

        self.pre_encoder = ConvolutionalSequence(layers=layers[:-1],
                                                 num_channels=num_channels,
                                                 image_size=image_size,
                                                 use_bias=use_bias,
                                                 name='encoder_conv_mapping')
        self.pre_encoder.allocate()
        n_channels = n_emb + self.pre_encoder.get_dim('output')[0]
        self.post_encoder = ConvolutionalSequence(layers=[layers[-1]],
                                                  num_channels=n_channels,
                                                  image_size=(1, 1),
                                                  use_bias=use_bias)
        children = [self.pre_encoder, self.post_encoder]
        kwargs.setdefault('children', []).extend(children)
        super(EncoderMapping, self).__init__(**kwargs)

    @application(inputs=['x', 'y'], outputs=['output'])
    def apply(self, x, y):
        "Returns mu and logsigma"
        # Getting emebdding
        pre_z = self.pre_encoder.apply(x)
        # Concatenating
        pre_z_embed_y = tensor.concatenate([pre_z, y], axis=1)
        # propagating through last layer
        return self.post_encoder.apply(pre_z_embed_y)
コード例 #3
0
def test_convolutional_sequence():
    x = tensor.tensor4('x')
    num_channels = 4
    pooling_size = 3
    batch_size = 5
    activation = Rectifier().apply

    conv = ConvolutionalLayer(activation, (3, 3),
                              5, (pooling_size, pooling_size),
                              weights_init=Constant(1.),
                              biases_init=Constant(5.))
    conv2 = ConvolutionalActivation(activation, (2, 2),
                                    4,
                                    weights_init=Constant(1.))

    seq = ConvolutionalSequence([conv, conv2],
                                num_channels,
                                image_size=(17, 13))
    seq.push_allocation_config()
    assert conv.num_channels == 4
    assert conv2.num_channels == 5
    conv2.convolution.use_bias = False
    y = seq.apply(x)
    seq.initialize()
    func = function([x], y)

    x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX)
    y_val = (numpy.ones((batch_size, 4, 4, 3)) * (9 * 4 + 5) * 4 * 5)
    assert_allclose(func(x_val), y_val)
コード例 #4
0
ファイル: test_conv.py プロジェクト: xuanhan863/blocks
def test_convolutional_sequence():
    x = tensor.tensor4('x')
    num_channels = 4
    pooling_size = 3
    batch_size = 5
    activation = Rectifier().apply

    conv = ConvolutionalLayer(activation, (3, 3), 5,
                              (pooling_size, pooling_size),
                              weights_init=Constant(1.),
                              biases_init=Constant(5.))
    conv2 = ConvolutionalActivation(activation, (2, 2), 4,
                                    weights_init=Constant(1.))

    seq = ConvolutionalSequence([conv, conv2], num_channels,
                                image_size=(17, 13))
    seq.push_allocation_config()
    assert conv.num_channels == 4
    assert conv2.num_channels == 5
    conv2.convolution.use_bias = False
    y = seq.apply(x)
    seq.initialize()
    func = function([x], y)

    x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX)
    y_val = (numpy.ones((batch_size, 4, 4, 3)) *
             (9 * 4 + 5) * 4 * 5)
    assert_allclose(func(x_val), y_val)
コード例 #5
0
    def build_conv_layers(self, image=None):

        if image is None:
            image = T.ftensor4('spectrogram')
        else:
            image = image

        conv_list = []
        for layer in range(self.layers):
            layer_param = self.params[layer]
            conv_layer = Convolutional(layer_param[0], layer_param[1],
                                       layer_param[2])
            pool_layer = MaxPooling(layer_param[3])

            conv_layer.name = "convolution" + str(layer)
            pool_layer.name = "maxpooling" + str(layer)

            conv_list.append(conv_layer)
            conv_list.append(pool_layer)
            conv_list.append(Rectifier())

        conv_seq = ConvolutionalSequence(conv_list,
                                         self.params[0][2],
                                         image_size=self.image_size,
                                         weights_init=IsotropicGaussian(
                                             std=0.5, mean=0),
                                         biases_init=Constant(0))

        conv_seq._push_allocation_config()
        conv_seq.initialize()
        out = conv_seq.apply(image)

        return out, conv_seq.get_dim('output')
コード例 #6
0
ファイル: test_conv.py プロジェクト: SwordYork/blocks
def test_convolutional_sequence_with_no_input_size():
    # suppose x is outputted by some RNN
    x = tensor.tensor4('x')
    filter_size = (1, 1)
    num_filters = 2
    num_channels = 1
    pooling_size = (1, 1)
    conv = Convolutional(filter_size, num_filters, tied_biases=False,
                         weights_init=Constant(1.), biases_init=Constant(1.))
    act = Rectifier()
    pool = MaxPooling(pooling_size)

    bad_seq = ConvolutionalSequence([conv, act, pool], num_channels,
                                    tied_biases=False)
    assert_raises_regexp(ValueError, 'Cannot infer bias size \S+',
                         bad_seq.initialize)

    seq = ConvolutionalSequence([conv, act, pool], num_channels,
                                tied_biases=True)
    try:
        seq.initialize()
        out = seq.apply(x)
    except TypeError:
        assert False, "This should have succeeded"

    assert out.ndim == 4
コード例 #7
0
ファイル: test_conv.py プロジェクト: violet-zct/blocks
def test_convolutional_sequence_with_no_input_size():
    # suppose x is outputted by some RNN
    x = tensor.tensor4('x')
    filter_size = (1, 1)
    num_filters = 2
    num_channels = 1
    pooling_size = (1, 1)
    conv = Convolutional(filter_size,
                         num_filters,
                         tied_biases=False,
                         weights_init=Constant(1.),
                         biases_init=Constant(1.))
    act = Rectifier()
    pool = MaxPooling(pooling_size)

    bad_seq = ConvolutionalSequence([conv, act, pool],
                                    num_channels,
                                    tied_biases=False)
    assert_raises_regexp(ValueError, 'Cannot infer bias size \S+',
                         bad_seq.initialize)

    seq = ConvolutionalSequence([conv, act, pool],
                                num_channels,
                                tied_biases=True)
    try:
        seq.initialize()
        out = seq.apply(x)
    except TypeError:
        assert False, "This should have succeeded"

    assert out.ndim == 4
コード例 #8
0
ファイル: convolution.py プロジェクト: olimastro/ift6266
    def build_conv_layers(self, image=None) :

        if image is None :
            image = T.ftensor4('spectrogram')
        else :
            image = image

        conv_list = []
        for layer in range(self.layers) :
            layer_param = self.params[layer]
            conv_layer = Convolutional(layer_param[0], layer_param[1], layer_param[2])
            pool_layer = MaxPooling(layer_param[3])

            conv_layer.name = "convolution"+str(layer)
            pool_layer.name = "maxpooling"+str(layer)

            conv_list.append(conv_layer)
            conv_list.append(pool_layer)
            conv_list.append(Rectifier())

        conv_seq = ConvolutionalSequence(
            conv_list,
            self.params[0][2],
            image_size=self.image_size,
            weights_init=IsotropicGaussian(std=0.5, mean=0),
            biases_init=Constant(0))

        conv_seq._push_allocation_config()
        conv_seq.initialize()
        out = conv_seq.apply(image)

        return out, conv_seq.get_dim('output')
コード例 #9
0
class Decoder(Initializable):
    def __init__(self,
                 layers,
                 num_channels,
                 image_size,
                 use_bias=False,
                 **kwargs):
        self.layers = layers
        self.num_channels = num_channels
        self.image_size = image_size

        self.mapping = ConvolutionalSequence(layers=layers,
                                             num_channels=num_channels,
                                             image_size=image_size,
                                             use_bias=use_bias,
                                             name='decoder_mapping')
        children = [self.mapping]
        kwargs.setdefault('children', []).extend(children)
        super(Decoder, self).__init__(**kwargs)

    @application(inputs=['z', 'y'], outputs=['outputs'])
    def apply(self, z, y, application_call):
        # Concatenating conditional data with inputs
        z_y = tensor.concatenate([z, y], axis=1)
        return self.mapping.apply(z_y)
コード例 #10
0
ファイル: googleNet.py プロジェクト: bordesf/IFT6266
def inception(image_shape, num_input, conv1, conv2, conv3, conv4, conv5, conv6, out, i):
    layers1 = []
    layers2 = []
    layers3 = []
    layers4 = []
    layers1.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers1.append(BatchNormalization(name='batch_{}'.format(i)))
    layers1.append(Rectifier())
    conv_sequence1 = ConvolutionalSequence(layers1, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence1.initialize()
    out1 = conv_sequence1.apply(out)
    i = i + 1

    layers2.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers2.append(BatchNormalization(name='batch_{}'.format(i)))
    layers2.append(Rectifier())
    i = i + 1
    layers2.append(Convolutional(filter_size=(3,3), num_channels=conv2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers2.append(BatchNormalization(name='batch_{}'.format(i)))
    layers2.append(Rectifier())
    conv_sequence2 = ConvolutionalSequence(layers2, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence2.initialize()
    out2 = conv_sequence2.apply(out)
    i = i + 1

    layers3.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv4, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers3.append(BatchNormalization(name='batch_{}'.format(i)))
    layers3.append(Rectifier())
    i = i + 1
    layers3.append(Convolutional(filter_size=(5,5), num_channels=conv4, num_filters=conv5, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers3.append(BatchNormalization(name='batch_{}'.format(i)))
    layers3.append(Rectifier())
    conv_sequence3 = ConvolutionalSequence(layers3, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence3.initialize()
    out3 = conv_sequence3.apply(out)
    i = i + 1

    layers4.append(MaxPooling((3,3), step=(1,1), padding=(1,1), name='pool_{}'.format(i)))
    layers4.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv6, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers4.append(BatchNormalization(name='batch_{}'.format(i)))
    layers4.append(Rectifier())
    i = i + 1
    conv_sequence4 = ConvolutionalSequence(layers4, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence4.initialize()
    out4 = conv_sequence4.apply(out)
    #Merge
    return T.concatenate([out1, out2, out3, out4], axis=1)
コード例 #11
0
ファイル: test_conv.py プロジェクト: SwordYork/blocks
def test_convolutional_sequence_with_raw_activation():
    seq = ConvolutionalSequence([Rectifier()], num_channels=4,
                                image_size=(20, 14))
    input_ = (((numpy.arange(2 * 4 * 20 * 14)
                .reshape((2, 4, 20, 14)) % 2) * 2 - 1)
              .astype(theano.config.floatX))
    expected_ = input_ * (input_ > 0)
    x = theano.tensor.tensor4()
    assert_allclose(seq.apply(x).eval({x: input_}), expected_)
コード例 #12
0
ファイル: test_conv.py プロジェクト: violet-zct/blocks
def test_convolutional_sequence_with_raw_activation():
    seq = ConvolutionalSequence([Rectifier()],
                                num_channels=4,
                                image_size=(20, 14))
    input_ = (((numpy.arange(2 * 4 * 20 * 14).reshape(
        (2, 4, 20, 14)) % 2) * 2 - 1).astype(theano.config.floatX))
    expected_ = input_ * (input_ > 0)
    x = theano.tensor.tensor4()
    assert_allclose(seq.apply(x).eval({x: input_}), expected_)
コード例 #13
0
class EncoderMapping(Initializable):
    """
    Parameters
    ----------
    layers: :class:`list`
        list of bricks
    num_channels: :class: `int`
           Number of input channels
    image_size: :class:`tuple`
        Image size
    n_emb: :class:`int`
        Dimensionality of the embedding
    use_bias: :class:`bool`
        self explanatory
    """
    def __init__(self, layers, num_channels, image_size, n_emb, use_bias=False, **kwargs):
        self.layers = layers
        self.num_channels = num_channels
        self.image_size = image_size

        self.pre_encoder = ConvolutionalSequence(layers=layers[:-1],
                                                 num_channels=num_channels,
                                                 image_size=image_size,
                                                 use_bias=use_bias,
                                                 name='encoder_conv_mapping')
        self.pre_encoder.allocate()
        n_channels = n_emb + self.pre_encoder.get_dim('output')[0]
        self.post_encoder = ConvolutionalSequence(layers=[layers[-1]],
                                                  num_channels=n_channels,
                                                  image_size=(1, 1),
                                                  use_bias=use_bias)
        children = [self.pre_encoder, self.post_encoder]
        kwargs.setdefault('children', []).extend(children)
        super(EncoderMapping, self).__init__(**kwargs)

    @application(inputs=['x', 'y'], outputs=['output'])
    def apply(self, x, y):
        "Returns mu and logsigma"
        # Getting emebdding
        pre_z = self.pre_encoder.apply(x)
        # Concatenating
        pre_z_embed_y = tensor.concatenate([pre_z, y], axis=1)
        # propagating through last layer
        return self.post_encoder.apply(pre_z_embed_y)
コード例 #14
0
def test_pooling_works_in_convolutional_sequence():
    x = tensor.tensor4('x')
    brick = ConvolutionalSequence([AveragePooling((2, 2), step=(2, 2)),
                                   MaxPooling((4, 4), step=(2, 2),
                                              ignore_border=True)],
                                  image_size=(16, 32), num_channels=3)
    brick.allocate()
    y = brick.apply(x)
    out = y.eval({x: numpy.empty((2, 3, 16, 32), dtype=theano.config.floatX)})
    assert out.shape == (2, 3, 3, 7)
def test_fully_layer():
	batch_size=2
	x = T.tensor4();
	y = T.ivector()
	V = 200
	layer_conv = Convolutional(filter_size=(5,5),num_filters=V,
				name="toto",
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	# try with no bias
	activation = Rectifier()
	pool = MaxPooling(pooling_size=(2,2))

	convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15,
					image_size=(10,10),
					name="conv_section")
	convnet.push_allocation_config()
	convnet.initialize()
	output=convnet.apply(x)
	batch_size=output.shape[0]
	output_dim=np.prod(convnet.get_dim('output'))
	result_conv = output.reshape((batch_size, output_dim))
	mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10],
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	mlp.initialize()
	output=mlp.apply(result_conv)
	cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output))
	cg = ComputationGraph(cost)
	W = VariableFilter(roles=[WEIGHT])(cg.variables)
	B = VariableFilter(roles=[BIAS])(cg.variables)
	W = W[0]; b = B[0]

	inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg)
	outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg)
	var_input=inputs_fully[0]
	var_output=outputs_fully[0]
	
	[d_W,d_S,d_b] = T.grad(cost, [W, var_output, b])

	d_b = d_b.dimshuffle(('x',0))
	d_p = T.concatenate([d_W, d_b], axis=0)
	x_value = 1e3*np.random.ranf((2,15, 10, 10))
	f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore')
	A, B, C= f(x_value, [5, 0])
	A = np.concatenate([A, np.ones((2,1))], axis=1)
	print 'A', A.shape
	print 'B', B.shape
	print 'C', C.shape

	print lin.norm(C - np.dot(np.transpose(A), B), 'fro')

	return
	
	"""
コード例 #16
0
def test_convolutional_sequence_use_bias():
    cnn = ConvolutionalSequence(
        sum([[Convolutional(filter_size=(1, 1), num_filters=1), Rectifier()]
             for _ in range(3)], []),
        num_channels=1, image_size=(1, 1),
        use_bias=False)
    cnn.allocate()
    x = tensor.tensor4()
    y = cnn.apply(x)
    params = ComputationGraph(y).parameters
    assert len(params) == 3 and all(param.name == 'W' for param in params)
コード例 #17
0
ファイル: test_conv.py プロジェクト: piergiaj/blocks
def test_convolutional_sequence_use_bias():
    cnn = ConvolutionalSequence(
        [ConvolutionalActivation(activation=Rectifier().apply, filter_size=(1, 1), num_filters=1) for _ in range(3)],
        num_channels=1,
        image_size=(1, 1),
        use_bias=False,
    )
    cnn.allocate()
    x = tensor.tensor4()
    y = cnn.apply(x)
    params = ComputationGraph(y).parameters
    assert len(params) == 3 and all(param.name == "W" for param in params)
コード例 #18
0
def test_convolutional_sequence_with_convolutions_raw_activation():
    seq = ConvolutionalSequence(
        [Convolutional(filter_size=(3, 3), num_filters=4),
         Rectifier(),
         Convolutional(filter_size=(5, 5), num_filters=3, step=(2, 2)),
         Tanh()],
        num_channels=2,
        image_size=(21, 39))
    seq.allocate()
    x = theano.tensor.tensor4()
    out = seq.apply(x).eval({x: numpy.ones((10, 2, 21, 39),
                                           dtype=theano.config.floatX)})
    assert out.shape == (10, 3, 8, 17)
コード例 #19
0
def conv_block(input_img,
               n_filter,
               filter_size,
               input_featuremap_size,
               ordering=''):

    # found in torch spatialconvolution
    std0 = 2. / (filter_size[0] * filter_size[1] *
                 input_featuremap_size[0])**.5
    std1 = 2. / (input_featuremap_size[0])**.5

    layers = []
    layers.append(
        Convolutional(filter_size=filter_size,
                      num_filters=n_filter,
                      border_mode='half',
                      name='conv%s_1' % (ordering, ),
                      use_bias=True,
                      weights_init=Uniform(width=std0)))
    layers.append(BatchNormalization(name='bn%s_1' % (ordering, )))
    layers.append(LeakyReLU())
    layers.append(
        Convolutional(filter_size=filter_size,
                      num_filters=n_filter,
                      border_mode='half',
                      name='conv%s_2' % (ordering, ),
                      use_bias=True,
                      weights_init=Uniform(width=std0)))
    layers.append(BatchNormalization(name='bn%s_2' % (ordering, )))
    layers.append(LeakyReLU())
    layers.append(
        Convolutional(filter_size=(1, 1),
                      num_filters=n_filter,
                      border_mode='valid',
                      name='conv%s_3b' % (ordering, ),
                      use_bias=True,
                      weights_init=Uniform(width=std1)))
    layers.append(BatchNormalization(name='bn%s_3' % (ordering, )))
    layers.append(LeakyReLU())

    conv_sequence = ConvolutionalSequence(
        layers,
        num_channels=input_featuremap_size[0],
        image_size=(input_featuremap_size[1], input_featuremap_size[2]),
        biases_init=Uniform(width=.1),
        name='convsequence%s' % (ordering, ))

    conv_sequence.initialize()
    return conv_sequence.apply(input_img)
コード例 #20
0
def main():
    initial = numpy.random.normal(0, 0.1, (1, 1, 200, 200))
    x = theano.shared(initial)

    conv_layer = ConvolutionalLayer(
        Rectifier().apply,
        (16, 16),
        9,
        (4, 4),
        1
    )
    conv_layer2 = ConvolutionalLayer(
        Rectifier().apply,
        (7, 7),
        9,
        (2, 2),
        1
    )
    con_seq = ConvolutionalSequence([conv_layer], 1,
                                    image_size=(200, 200),
                                    weights_init=IsotropicGaussian(0.1),
                                    biases_init=Constant(0.)
                                    )

    con_seq.initialize()
    out = con_seq.apply(x)
    target_out = out[0, 0, 1, 1]

    grad = theano.grad(target_out - .1 * (x ** 2).sum(), x)
    updates = {x: x + 5e-1 * grad}
    #x.set_value(numpy.ones((1, 1, 200, 200)))
    #print theano.function([], out)()

    make_step = theano.function([], target_out, updates=updates)

    for i in xrange(400):
        out_val = make_step()
        print i, out_val

    image = x.get_value()[0][0]
    image = (image - image.mean()) / image.std()
    image = numpy.array([image, image, image]).transpose(1, 2, 0)
    plt.imshow(numpy.cast['uint8'](image * 65. + 128.), interpolation='none')
    plt.show()
コード例 #21
0
class Decoder(Initializable):
    def __init__(self, layers, num_channels, image_size, use_bias=False, **kwargs):
        self.layers = layers
        self.num_channels = num_channels
        self.image_size = image_size

        self.mapping = ConvolutionalSequence(layers=layers,
                                             num_channels=num_channels,
                                             image_size=image_size,
                                             use_bias=use_bias,
                                             name='decoder_mapping')
        children = [self.mapping]
        kwargs.setdefault('children', []).extend(children)
        super(Decoder, self).__init__(**kwargs)

    @application(inputs=['z', 'y'], outputs=['outputs'])
    def apply(self, z, y, application_call):
        # Concatenating conditional data with inputs
        z_y = tensor.concatenate([z, y], axis=1)
        return self.mapping.apply(z_y)
コード例 #22
0
x = tensor.tensor4('image_features')
y = tensor.lmatrix('targets')

num_epochs = 1000
layers = []

###############FIRST STAGE#######################
#Create the convolutions layers
layers.append(Convolutional(filter_size=(7,7), step=(2,2), num_filters=96, border_mode='half', name='conv_0'))
layers.append(BatchNormalization(name='batch_0'))
layers.append(Rectifier())
layers.append(MaxPooling((3,3), step=(2,2), padding=(1,1), name='pool_0'))

convSeq = ConvolutionalSequence(layers, num_channels=3, image_size=(220,220), weights_init=Orthogonal(), use_bias=False, name='ConvSeq')
convSeq.initialize()
out = convSeq.apply(x)

#FIRE MODULES
out1 = Fire((55,55), 96, 16, 16, 16, out, 10)
out2 = Fire((55,55), 128, 16, 16, 16, out1, 25)
out3 = Fire((55,55), 128, 32, 32, 32, out2, 300)
out31 = MaxPooling((3,3), step=(2,2), padding=(1,1), name='poolLow').apply(out3)
out4 = Fire((28,28), 256, 32, 32, 32, out31, 45)
out5 = Fire((28,28), 256, 48, 48, 48, out4, 500)
out6 = Fire((28,28), 384, 48, 48, 48, out5, 65)
out7 = Fire((28,28), 384, 64, 64, 64, out6, 700)
out71 = MaxPooling((3,3), step=(2,2), padding=(1,1), name='poolLow2').apply(out7)
out8 = Fire((14,14), 512, 64, 64, 64, out71, 85)

#LAST LAYERS
conv_layers1 = list([Convolutional(filter_size=(1,1), num_filters=2, name='Convx2'), BatchNormalization(name='batch_vx2'), Rectifier(),
コード例 #23
0
ファイル: googleNet.py プロジェクト: bordesf/IFT6266
layers.append(Convolutional(filter_size=(1,1), num_filters=64, border_mode='half', name='conv_1'))
layers.append(BatchNormalization(name='batch_1'))
layers.append(Rectifier())
layers.append(MaxPooling((3,3), step=(2,2), padding=(1,1), name='pool_1'))
layers.append(Convolutional(filter_size=(3,3), num_filters=192, border_mode='half', name='conv_2'))
layers.append(BatchNormalization(name='batch_2'))
layers.append(Rectifier())
layers.append(MaxPooling((3,3), step=(2,2), padding=(1,1), name='pool_2'))

#Create the sequence
conv_sequence = ConvolutionalSequence(layers, num_channels=3, image_size=(160,160), weights_init=Orthogonal(), use_bias=False, name='convSeq')
#Initialize the convnet
conv_sequence.initialize()
#Output the first result
out = conv_sequence.apply(x)

###############SECOND STAGE#####################
out2 = inception((20,20), 192, 64, 96, 128, 16, 32, 32, out, 10)
out3 = inception((20,20), 256, 128, 128, 192, 32, 96, 64, out2, 20)
out31 = MaxPooling((2,2), name='poolLow').apply(out3)

out4 = inception((10,10), 480, 192, 96, 208, 16, 48, 64, out31, 30)
out5 = inception((10,10), 512, 160, 112, 224, 24, 64, 64, out4, 40)
out6 = inception((10,10), 512, 128, 128, 256, 24, 64, 64, out5, 50)
out7 = inception((10,10), 512, 112, 144, 288, 32, 64, 64, out6, 60)
out8 = inception((10,10), 528, 256, 160, 320, 32, 128, 128, out7, 70)
out81 = MaxPooling((2,2), name='poolLow1').apply(out8)

out9 = inception((5,5), 832, 256, 160, 320, 32, 128, 128, out81, 80)
out10 = inception((5,5), 832, 384, 192, 384, 48, 128, 128, out9, 90)
コード例 #24
0
ファイル: test2.py プロジェクト: jpilaul/IFT6266_project
pooling_sizes = [(2, 2)] * 2
activation = Logistic().apply
conv_layers = [
    b.ConvolutionalLayer(activation, filter_size, num_filters_, pooling_size, num_channels=3)
    for filter_size, num_filters_, pooling_size
    in zip(filter_sizes, num_filters, pooling_sizes)
]

convnet = ConvolutionalSequence(conv_layers, num_channels=3,
                                image_size=(32, 32),
                                weights_init=Uniform(0, 0.2),
                                biases_init=Constant(0.))


convnet.initialize()
conv_features = Flattener().apply(convnet.apply(X))


# MLP

mlp = MLP(activations=[Logistic(name='sigmoid_0'),
          Softmax(name='softmax_1')], dims=[ 256, 256, 256, 2],
          weights_init=IsotropicGaussian(0.01), biases_init=Constant(0))
[child.name for child in mlp.children]
['linear_0', 'sigmoid_0', 'linear_1', 'softmax_1']
Y = mlp.apply(conv_features)
mlp.initialize()


# Setting up the cost function
from blocks.bricks.cost import CategoricalCrossEntropy
コード例 #25
0
def inception(image_shape, num_input, conv1, conv2, conv3, conv4, conv5, conv6,
              out, i):
    layers1 = []
    layers2 = []
    layers3 = []
    layers4 = []
    layers1.append(
        Convolutional(filter_size=(1, 1),
                      num_channels=num_input,
                      num_filters=conv1,
                      image_size=image_shape,
                      border_mode='half',
                      name='conv_{}'.format(i)))
    layers1.append(BatchNormalization(name='batch_{}'.format(i)))
    layers1.append(Rectifier())
    conv_sequence1 = ConvolutionalSequence(layers1,
                                           num_channels=num_input,
                                           image_size=image_shape,
                                           weights_init=Orthogonal(),
                                           use_bias=False,
                                           name='convSeq_{}'.format(i))
    conv_sequence1.initialize()
    out1 = conv_sequence1.apply(out)
    i = i + 1

    layers2.append(
        Convolutional(filter_size=(1, 1),
                      num_channels=num_input,
                      num_filters=conv2,
                      image_size=image_shape,
                      border_mode='half',
                      name='conv_{}'.format(i)))
    layers2.append(BatchNormalization(name='batch_{}'.format(i)))
    layers2.append(Rectifier())
    i = i + 1
    layers2.append(
        Convolutional(filter_size=(3, 3),
                      num_channels=conv2,
                      num_filters=conv3,
                      image_size=image_shape,
                      border_mode='half',
                      name='conv_{}'.format(i)))
    layers2.append(BatchNormalization(name='batch_{}'.format(i)))
    layers2.append(Rectifier())
    conv_sequence2 = ConvolutionalSequence(layers2,
                                           num_channels=num_input,
                                           image_size=image_shape,
                                           weights_init=Orthogonal(),
                                           use_bias=False,
                                           name='convSeq_{}'.format(i))
    conv_sequence2.initialize()
    out2 = conv_sequence2.apply(out)
    i = i + 1

    layers3.append(
        Convolutional(filter_size=(1, 1),
                      num_channels=num_input,
                      num_filters=conv4,
                      image_size=image_shape,
                      border_mode='half',
                      name='conv_{}'.format(i)))
    layers3.append(BatchNormalization(name='batch_{}'.format(i)))
    layers3.append(Rectifier())
    i = i + 1
    layers3.append(
        Convolutional(filter_size=(5, 5),
                      num_channels=conv4,
                      num_filters=conv5,
                      image_size=image_shape,
                      border_mode='half',
                      name='conv_{}'.format(i)))
    layers3.append(BatchNormalization(name='batch_{}'.format(i)))
    layers3.append(Rectifier())
    conv_sequence3 = ConvolutionalSequence(layers3,
                                           num_channels=num_input,
                                           image_size=image_shape,
                                           weights_init=Orthogonal(),
                                           use_bias=False,
                                           name='convSeq_{}'.format(i))
    conv_sequence3.initialize()
    out3 = conv_sequence3.apply(out)
    i = i + 1

    layers4.append(
        MaxPooling((3, 3),
                   step=(1, 1),
                   padding=(1, 1),
                   name='pool_{}'.format(i)))
    layers4.append(
        Convolutional(filter_size=(1, 1),
                      num_channels=num_input,
                      num_filters=conv6,
                      image_size=image_shape,
                      border_mode='half',
                      name='conv_{}'.format(i)))
    layers4.append(BatchNormalization(name='batch_{}'.format(i)))
    layers4.append(Rectifier())
    i = i + 1
    conv_sequence4 = ConvolutionalSequence(layers4,
                                           num_channels=num_input,
                                           image_size=image_shape,
                                           weights_init=Orthogonal(),
                                           use_bias=False,
                                           name='convSeq_{}'.format(i))
    conv_sequence4.initialize()
    out4 = conv_sequence4.apply(out)
    #Merge
    return T.concatenate([out1, out2, out3, out4], axis=1)
コード例 #26
0
ファイル: firstnet.py プロジェクト: youralien/MLFun
def main():
    # # # # # # # # # # # 
    # Modeling Building #
    # # # # # # # # # # # 
    
    # ConvOp requires input be a 4D tensor
    x = tensor.tensor4("features")

    y = tensor.ivector("targets")

    # Convolutional Layers
    # ====================
    
    # "Improving neural networks by preventing co-adaptation of feature detectors"
    # conv_layers = [
    #     # ConvolutionalLayer(activiation, filter_size, num_filters, pooling_size, name)
    #       ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l1')
    #     , ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l2')
    #     , ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l3')
    #     ]

    # "VGGNet"
    conv_layers = [
          ConvolutionalActivation(Rectifier().apply, (3,3), 64, border_mode='full', name='l1')
        , ConvolutionalLayer(Rectifier().apply, (3,3), 64, (2,2), border_mode='full', name='l2')
        , ConvolutionalActivation(Rectifier().apply, (3,3), 128, border_mode='full', name='l3')
        , ConvolutionalLayer(Rectifier().apply, (3,3), 128, (2,2), border_mode='full', name='l4')
        , ConvolutionalActivation(Rectifier().apply, (3,3), 256, border_mode='full', name='l5')
        , ConvolutionalLayer(Rectifier().apply, (3,3), 256, (2,2), border_mode='full', name='l6')
        ]

    # Bake my own
    # conv_layers = [
    #     # ConvolutionalLayer(activiation, filter_size, num_filters, pooling_size, name)
    #       ConvolutionalLayer(Rectifier().apply, (5,5), 64, (2,2), border_mode='full', name='l1')
    #     , ConvolutionalLayer(Rectifier().apply, (3,3), 128, (2,2), border_mode='full', name='l2')
    #     , ConvolutionalActivation(Rectifier().apply, (3,3), 256, border_mode='full', name='l3')
    #     , ConvolutionalLayer(Rectifier().apply, (3,3), 256, (2,2), border_mode='full', name='l4')
    #     ]

    
    convnet = ConvolutionalSequence(
        conv_layers, num_channels=3, image_size=(32,32),
        weights_init=IsotropicGaussian(0.1),
        biases_init=Constant(0)
        )
    convnet.initialize()

    output_dim = np.prod(convnet.get_dim('output'))

    # Fully Connected Layers
    # ======================
    conv_features = convnet.apply(x)
    features = Flattener().apply(conv_features)

    mlp = MLP(  activations=[Rectifier()]*2+[None]
              , dims=[output_dim, 256, 256, 10]
              , weights_init=IsotropicGaussian(0.01)
              , biases_init=Constant(0)
        )
    mlp.initialize()

    y_hat = mlp.apply(features)
    # print y_hat.shape.eval({x: np.zeros((1, 3, 32, 32), dtype=theano.config.floatX)})

    # Numerically Stable Softmax
    cost = Softmax().categorical_cross_entropy(y, y_hat)
    error_rate = MisclassificationRate().apply(y, y_hat)

    cg = ComputationGraph(cost)

    weights = VariableFilter(roles=[FILTER, WEIGHT])(cg.variables)
    l2_regularization = 0.005 * sum((W**2).sum() for W in weights)

    cost = cost + l2_regularization
    cost.name = 'cost_with_regularization'

    # Print sizes to check
    print("Representation sizes:")
    for layer in convnet.layers:
        print(layer.get_dim('input_'))

    # # # # # # # # # # # 
    # Modeling Training #
    # # # # # # # # # # # 

    # Figure out data source
    train = CIFAR10("train")
    test = CIFAR10("test")

    # Load Data Using Fuel
    train_stream = DataStream.default_stream(
          dataset=train
        , iteration_scheme=SequentialScheme(train.num_examples, batch_size=128))
    test_stream = DataStream.default_stream(
          dataset=test
        , iteration_scheme=SequentialScheme(test.num_examples, batch_size=1024))

    # Train
    algorithm = GradientDescent(
          cost=cost
        , params=cg.parameters
        , step_rule=Adam(learning_rate=0.0005)
        )


    main_loop = MainLoop(
          model=Model(cost)
        , data_stream=train_stream
        , algorithm=algorithm
        , extensions=[
              TrainingDataMonitoring(
                  [cost, error_rate]
                , prefix='train'
                , after_epoch=True)
            , DataStreamMonitoring(
                  [cost, error_rate]
                , test_stream,
                  prefix='test')
            , ExperimentSaver(dest_directory='...', src_directory='.')
            , Printing()
            , ProgressBar()
            ]
        )
    main_loop.run()
コード例 #27
0
    step=conv_step,
    border_mode=border_mode,
    name='conv_{}_1'.format(i)))
  conv_layers2.append(conv_activation[i])
  conv_layers2.append(MaxPooling(pooling_size, name='pool_{}_1'.format(i)))


# ---------------------------------------------------------------
# Building both sequences and merge them by tensor.concatenate
# ---------------------------------------------------------------

conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_size,weights_init=Uniform(width=0.2), biases_init=Constant(0.), name='conv_sequence_0')
conv_sequence2 = ConvolutionalSequence(conv_layers2, num_channels, image_size=image_size,weights_init=Uniform(width=0.2), biases_init=Constant(0.), name='conv_sequence_1')

conv_sequence.initialize()
conv_out1 = Flattener(name='flattener_0').apply(conv_sequence.apply(x))
conv_out2 = Flattener(name='flattener_1').apply(conv_sequence2.apply(x2))
conv_out = tensor.concatenate([conv_out1,conv_out2],axis=1)

top_mlp_dims = [2*numpy.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size]
top_mlp = MLP(mlp_activation, top_mlp_dims,weights_init=GlorotInitialization(),biases_init=Constant(0.))
top_mlp.initialize()

predict = top_mlp.apply(conv_out)

# ---------------------------------------------------------------
# Building computational graph
# ---------------------------------------------------------------

cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost')
error = MisclassificationRate().apply(y.flatten(), predict)
コード例 #28
0
ファイル: main_cnn.py プロジェクト: DjAntaki/IFT6266H16
def build_and_run(label, config):
    ############## CREATE THE NETWORK ###############
    #Define the parameters
    num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation  = config['num_epochs'], config['num_batches'], config['num_channels'], config['image_shape'], config['filter_size'], config['num_filter'], config['pooling_sizes'], config['mlp_hiddens'], config['output_size'], config['batch_size'], config['activation'], config['mlp_activation']
#    print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation)
    lambda_l1 = 0.000025
    lambda_l2 = 0.000025

    print("Building model")
    #Create the symbolics variable
    x = T.tensor4('image_features')
    y = T.lmatrix('targets')

    #Get the parameters
    conv_parameters = zip(filter_size, num_filter)

    #Create the convolutions layers
    conv_layers = list(interleave([(Convolutional(
                                      filter_size=filter_size,
                                      num_filters=num_filter,
                                      name='conv_{}'.format(i))
                    for i, (filter_size, num_filter)
                    in enumerate(conv_parameters)),
                  (activation),
            (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))
        #    (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))

    #Create the sequence
    conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.))
    #Initialize the convnet
    conv_sequence.initialize()
    #Add the MLP
    top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size]
    out = Flattener().apply(conv_sequence.apply(x))
    mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2),
              biases_init=Constant(0.))
    #Initialisze the MLP
    mlp.initialize()
    #Get the output
    predict = mlp.apply(out)

    cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost')
    error = MisclassificationRate().apply(y.flatten(), predict)

    #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
    error_rate = error.copy(name='error_rate')
    error_rate2 = error.copy(name='error_rate2')

    ########### REGULARIZATION ##################
    cg = ComputationGraph([cost])
    weights = VariableFilter(roles=[WEIGHT])(cg.variables)
    biases = VariableFilter(roles=[BIAS])(cg.variables)
  # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
    l2_penalty = T.sum([lambda_l2 * (W ** 2).sum() for i,W in enumerate(weights+biases)]) # Gradually increase penalty for layer
  # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases])
  # # #l2_penalty = l2_penalty_weights + l2_penalty_bias
    l2_penalty.name = 'l2_penalty'
    l1_penalty = T.sum([lambda_l1*T.abs_(z).sum() for z in weights+biases])
  #  l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer    
  #  l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases])
  #  l1_penalty = l1_penalty_biases + l1_penalty_weights
    l1_penalty.name = 'l1_penalty'
    costreg = cost + l2_penalty + l1_penalty
    costreg.name = 'costreg'
    
    ########### DEFINE THE ALGORITHM #############
  #  algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum())
    algorithm = GradientDescent(cost=costreg, parameters=cg.parameters, step_rule=Adam())

    ########### GET THE DATA #####################
    istest = 'test' in config.keys()
    train_stream, valid_stream, test_stream = get_stream(batch_size,image_shape,test=istest)
    

    ########### INITIALIZING EXTENSIONS ##########
    checkpoint = Checkpoint('models/best_'+label+'.tar')
    checkpoint.add_condition(['after_epoch'],
                         predicate=OnLogRecord('valid_error_rate_best_so_far'))
    #Adding a live plot with the bokeh server
    plot = Plot(label,
        channels=[['train_error_rate', 'valid_error_rate'],
                  ['valid_cost', 'valid_error_rate2'],
                 # ['train_costreg','train_grad_norm']], #  
                 ['train_costreg','train_total_gradient_norm','train_l2_penalty','train_l1_penalty']],
                  server_url="http://hades.calculquebec.ca:5042")  
   
    grad_norm = aggregation.mean(algorithm.total_gradient_norm)
    grad_norm.name = 'grad_norm'

    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs,
                  after_n_batches=num_batches),
                  DataStreamMonitoring([cost, error_rate, error_rate2], valid_stream, prefix="valid"),
                  TrainingDataMonitoring([costreg, error_rate, error_rate2,
                    grad_norm,l2_penalty,l1_penalty],
                     prefix="train", after_epoch=True),
                  plot,
                  ProgressBar(),
                  Printing(),
                  TrackTheBest('valid_error_rate',min), #Keep best
                  checkpoint,  #Save best
                  FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4)] # Early-stopping                  
    model = Model(cost)
    main_loop = MainLoop(algorithm,data_stream=train_stream,model=model,extensions=extensions)
    main_loop.run()
コード例 #29
0
for i, (filter_size,num_filter,pooling_size) in enumerate(conv_parameters):
  conv_layers.append(SpatialBatchNormalization(name='sbn_{}'.format(i)))
  conv_layers.append(
  Convolutional(
    filter_size=filter_size,
    num_filters=num_filter,
    step=conv_step,
    border_mode=border_mode,
    name='conv_{}'.format(i)))
  conv_layers.append(conv_activation[i])
  conv_layers.append(MaxPooling(pooling_size, name='pool_{}'.format(i)))

conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_size,weights_init=Uniform(width=0.2), biases_init=Constant(0.))
conv_sequence.initialize()
out = Flattener().apply(conv_sequence.apply(x))

top_mlp_dims = [numpy.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size]
top_mlp = MLP(mlp_activation, top_mlp_dims,weights_init=GlorotInitialization(),biases_init=Constant(0.))
top_mlp.initialize()

predict = top_mlp.apply(out)

cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost')
error = MisclassificationRate().apply(y.flatten(), predict)
error_rate = error.copy(name='error_rate')
error_rate2 = error.copy(name='error_rate2')
cg = ComputationGraph([cost, error_rate])
inputs = VariableFilter(roles=[INPUT])(cg.variables)
linear_inputs_index = [-10,-8,6]
linear_inputs = list(itemgetter(*linear_inputs_index)(inputs))
コード例 #30
0
def run_experiment():

    np.random.seed(42)

    X = tensor.tensor4('features')
    nbr_channels = 3
    image_shape = (5, 5)

    conv_layers = [
        ConvolutionalLayer(
            filter_size=(2, 2),
            num_filters=10,
            activation=Rectifier().apply,
            border_mode='valid',
            pooling_size=(1, 1),
            weights_init=Uniform(width=0.1),
            #biases_init=Uniform(width=0.01),
            biases_init=Constant(0.0),
            name='conv0')
    ]
    conv_sequence = ConvolutionalSequence(conv_layers,
                                          num_channels=nbr_channels,
                                          image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()

    flattener = Flattener()
    conv_output = conv_sequence.apply(X)
    y_hat = flattener.apply(conv_output)
    # Whatever. Not important since we're not going to actually train anything.
    cost = tensor.sqr(y_hat).sum()

    #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)]
    L_grads_method_02 = [
        tensor.grad(cost, v) for v in VariableFilter(
            roles=[BIAS])(ComputationGraph([y_hat]).variables)
    ]
    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_02 = sum(
        [tensor.sqr(g).sum() for g in L_grads_method_02])

    D_by_layer = get_conv_layers_transformation_roles(
        ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations(
        D_by_layer, cost)

    # why does this thing depend on N again ?
    # I don't think I've used a cost that divides by N.

    N = 2
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0],
                             image_shape[1]).astype(np.float32)
    #Xtrain[1:,:,:,:] = 0.0
    Xtrain[:, :, :, :] = 1.0

    convolution_filter_variable = VariableFilter(roles=[FILTER])(
        ComputationGraph([y_hat]).variables)[0]
    convolution_filter_variable_value = convolution_filter_variable.get_value()
    convolution_filter_variable_value[:, :, :, :] = 1.0
    #convolution_filter_variable_value[0,0,:,:] = 1.0
    convolution_filter_variable.set_value(convolution_filter_variable_value)

    f = theano.function([X], [
        cost, individual_sum_square_norm_gradients_method_00,
        sum_square_norm_gradients_method_02
    ])

    [c, v0, gs2] = f(Xtrain)

    #print "[c, v0, gs2]"
    L_c, L_v0, L_gs2 = ([], [], [])
    for n in range(N):
        [nc, nv0, ngs2] = f(Xtrain[n, :, :, :].reshape(
            (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1, -1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1, -1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs2).reshape((1, -1)) / v0.reshape((1, -1))
コード例 #31
0
def create_network(inputs=None, batch=batch_size):
    if inputs is None:
        inputs = T.tensor4('features')
    x = T.cast(inputs, 'float32')
    x = x / 255. if dataset != 'binarized_mnist' else x

    # PixelCNN architecture
    conv_list = [
        ConvolutionalNoFlip(*first_layer, mask='A', name='0'),
        Rectifier()
    ]
    for i in range(n_layer):
        conv_list.extend([
            ConvolutionalNoFlip(*second_layer, mask='B', name=str(i + 1)),
            Rectifier()
        ])

    conv_list.extend([
        ConvolutionalNoFlip((1, 1),
                            h * n_channel,
                            mask='B',
                            name=str(n_layer + 1)),
        Rectifier()
    ])
    conv_list.extend([
        ConvolutionalNoFlip((1, 1),
                            h * n_channel,
                            mask='B',
                            name=str(n_layer + 2)),
        Rectifier()
    ])
    conv_list.extend(
        [ConvolutionalNoFlip(*third_layer, mask='B', name=str(n_layer + 3))])

    sequence = ConvolutionalSequence(conv_list,
                                     num_channels=n_channel,
                                     batch_size=batch,
                                     image_size=(img_dim, img_dim),
                                     border_mode='half',
                                     weights_init=IsotropicGaussian(std=0.05,
                                                                    mean=0),
                                     biases_init=Constant(0.02),
                                     tied_biases=False)
    sequence.initialize()
    x = sequence.apply(x)
    if MODE == '256ary':
        x = x.reshape(
            (-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0, 2, 3, 4, 1)
        x = x.reshape((-1, 256))
        x_hat = Softmax().apply(x)
        inp = T.cast(inputs, 'int64').flatten()
        cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim
        cost_bits_dim = categorical_crossentropy(log_softmax(x), inp)
    else:
        x_hat = Logistic().apply(x)
        cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim
        #cost = T.nnet.binary_crossentropy(x_hat, inputs)
        #cost = cost.sum() / inputs.shape[0]
        cost_bits_dim = -(inputs * T.log2(x_hat) +
                          (1.0 - inputs) * T.log2(1.0 - x_hat)).mean()

    cost_bits_dim.name = "nnl_bits_dim"
    cost.name = 'loglikelihood_nat'
    return cost, cost_bits_dim
コード例 #32
0
ファイル: convnet-2D3D.py プロジェクト: policecar/dann
# pooling_size, num_channels, conv_step=(1, 1), pooling_step=None, batch_size=None,
# image_size=None, border_mode='valid', tied_biases=False, **kwargs)
conv_layers = [
    ConvolutionalLayer(Rectifier().apply, filter_size_1, num_filters_1,
                       pooling_size_1, name='conv_1'),
    ConvolutionalLayer(Rectifier().apply, filter_size_2, num_filters_2,
                       pooling_size_2, name='conv_2')]

convnet = ConvolutionalSequence(conv_layers,
                                num_channels=num_channels,
                                image_size=(num_rows, num_cols),
                                weights_init=IsotropicGaussian(0.1),
                                biases_init=Constant(0))

# Apply ( aka instantiate this part of the computational graph )
features = Flattener().apply(convnet.apply(x))
# features = Feedforward((convnet.apply(x)))
# features = convnet.apply(x)

for i, l in enumerate(convnet.layers):
    print("Layer {0}: {1} inputs and {2} outputs.".format(i, l.get_dim('input_'), l.get_dim('output')))

# Initialize ( aka fill the theano variables representing parameters with values )
print("Initializing the convnet..")
convnet.initialize()

# Get the dimensionality of the last layer
conv_out_dim = np.prod(convnet.layers[-1].get_dim('output'))
print("Output dimensionality of the ConvNet: {0}".format(conv_out_dim))

# Define fully connected layers
def run_experiment():

    np.random.seed(42)

    #X = tensor.matrix('features')
    X = tensor.tensor4('features')
    y = tensor.matrix('targets')
    nbr_channels = 3
    image_shape = (30, 30)

    conv_layers = [ ConvolutionalLayer( filter_size=(4,4),
                                        num_filters=10,
                                        activation=Rectifier().apply,
                                        border_mode='full',
                                        pooling_size=(1,1),
                                        weights_init=Uniform(width=0.1),
                                        biases_init=Constant(0.0),
                                        name='conv0'),
                    ConvolutionalLayer( filter_size=(3,3),
                                        num_filters=14,
                                        activation=Rectifier().apply,
                                        border_mode='full',
                                        pooling_size=(1,1),
                                        weights_init=Uniform(width=0.1),
                                        biases_init=Constant(0.0),
                                        name='conv1')]
    conv_sequence = ConvolutionalSequence(  conv_layers,
                                            num_channels=nbr_channels,
                                            image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()
    conv_output_dim = np.prod(conv_sequence.get_dim('output'))
    #conv_output_dim = 25*25

    flattener = Flattener()

    mlp = MLP(  activations=[Rectifier(), Rectifier(), Softmax()],
                dims=[conv_output_dim, 50, 50, 10],
                weights_init=IsotropicGaussian(std=0.1), biases_init=IsotropicGaussian(std=0.01))
    mlp.initialize()

    conv_output = conv_sequence.apply(X)
    y_hat = mlp.apply(flattener.apply(conv_output))

    cost = CategoricalCrossEntropy().apply(y, y_hat)
    #cost = CategoricalCrossEntropy().apply(y_hat, y)
    #cost = BinaryCrossEntropy().apply(y.flatten(), y_hat.flatten())

    cg = ComputationGraph([y_hat])
    
    """
    print "--- INPUT ---"
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[INPUT])(cg.variables):
        print v.tag.annotations[0].name

    print "--- OUTPUT ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables):
        print v.tag.annotations[0].name

    print "--- WEIGHT ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables):
        print v.tag.annotations[0].name
    print "--- BIAS ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables):
        print v.tag.annotations[0].name
    """

    # check out .tag on the variables to see which layer they belong to

    print "----------------------------"


    D_by_layer = get_linear_transformation_roles(mlp, cg)

    # returns a vector with one entry for each in the mini-batch
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_linear_transformations(D_by_layer, cost)

    #import pprint
    #pp = pprint.PrettyPrinter(indent=4)
    #pp.pprint(get_conv_layers_transformation_roles(ComputationGraph(conv_output)).items())

    D_by_layer = get_conv_layers_transformation_roles(ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 += get_sum_square_norm_gradients_conv_transformations(D_by_layer, cost)



    print "There are %d entries in cg.parameters." % len(cg.parameters)
    L_grads_method_01 = [tensor.grad(cost, p) for p in cg.parameters]
    L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[WEIGHT, BIAS])(cg.variables)]

    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_01 = sum([tensor.sqr(g).sum() for g in L_grads_method_01])
    sum_square_norm_gradients_method_02 = sum([tensor.sqr(g).sum() for g in L_grads_method_02])

    N = 8
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32)

    # Option 1.
    ytrain = np.zeros((N, 10), dtype=np.float32)
    for n in range(N):
        label = np.random.randint(low=0, high=10)
        ytrain[n, label] = 1.0

    # Option 2, just to debug situations with NaN.
    #ytrain = np.random.rand(N, 10).astype(np.float32)
    #for n in range(N):
    #    ytrain[n,:] = ytrain[n,:] / ytrain[n,:].sum()


    f = theano.function([X,y],
                        [cost,
                            individual_sum_square_norm_gradients_method_00,
                            sum_square_norm_gradients_method_01,
                            sum_square_norm_gradients_method_02])

    [c, v0, gs1, gs2] = f(Xtrain, ytrain)

    #print "[c, v0, gs1, gs2]"
    L_c, L_v0, L_gs1, L_gs2 = ([], [], [], [])
    for n in range(N):
        [nc, nv0, ngs1, ngs2] = f(Xtrain[n,:].reshape((1,Xtrain.shape[1],Xtrain.shape[2], Xtrain.shape[3])), ytrain[n,:].reshape((1,10)))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs1.append(ngs1)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1,-1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs1).reshape((1,-1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1,-1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs1)))
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs1).reshape((1,-1)) / v0.reshape((1,-1))
コード例 #34
0
def Fire(image_shape, num_input, conv1, conv2, conv3, out, i):
    layers11 = []
    layers12 = []
    layers13 = []
    layers14 = []

    ############# SQUEEZE ###########
    ### 4 Conv 1x1 ###
    layers11.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers11.append(BatchNormalization(name='batch_{}'.format(i)))
    layers11.append(Rectifier())
    conv_sequence11 = ConvolutionalSequence(layers11, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence11.initialize()
    out11 = conv_sequence11.apply(out)
    i = i + 1

    layers12.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers12.append(BatchNormalization(name='batch_{}'.format(i)))
    layers12.append(Rectifier())
    conv_sequence12 = ConvolutionalSequence(layers12, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence12.initialize()
    out12 = conv_sequence12.apply(out)
    i = i + 1

    layers13.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers13.append(BatchNormalization(name='batch_{}'.format(i)))
    layers13.append(Rectifier())
    conv_sequence13 = ConvolutionalSequence(layers13, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence13.initialize()
    out13 = conv_sequence13.apply(out)
    i = i + 1

    layers14.append(Convolutional(filter_size=(1,1), num_channels=num_input, num_filters=conv1, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers14.append(BatchNormalization(name='batch_{}'.format(i)))
    layers14.append(Rectifier())
    conv_sequence14 = ConvolutionalSequence(layers14, num_channels=num_input, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence14.initialize()
    out14 = conv_sequence14.apply(out)
    i = i + 1

    squeezed = T.concatenate([out11, out12, out13, out14], axis=1)

    ####### EXPAND #####
    layers21 = []
    layers22 = []
    layers23 = []
    layers24 = []
    layers31 = []
    layers32 = []
    layers33 = []
    layers34 = []
    num_input2 = conv1 * 4
    ### 4 conv 1x1 ###
    layers21.append(Convolutional(filter_size=(1,1), num_channels=num_input2, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers21.append(BatchNormalization(name='batch_{}'.format(i)))
    layers21.append(Rectifier())
    conv_sequence21 = ConvolutionalSequence(layers21, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence21.initialize()
    out21 = conv_sequence21.apply(squeezed)
    i = i + 1

    layers22.append(Convolutional(filter_size=(1,1), num_channels=num_input2, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers22.append(BatchNormalization(name='batch_{}'.format(i)))
    layers22.append(Rectifier())
    conv_sequence22 = ConvolutionalSequence(layers22, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence22.initialize()
    out22 = conv_sequence22.apply(squeezed)
    i = i + 1

    layers23.append(Convolutional(filter_size=(1,1), num_channels=num_input2, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers23.append(BatchNormalization(name='batch_{}'.format(i)))
    layers23.append(Rectifier())
    conv_sequence23 = ConvolutionalSequence(layers23, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence23.initialize()
    out23 = conv_sequence23.apply(squeezed)
    i = i + 1

    layers24.append(Convolutional(filter_size=(1,1), num_channels=num_input2, num_filters=conv2, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers24.append(BatchNormalization(name='batch_{}'.format(i)))
    layers24.append(Rectifier())
    conv_sequence24 = ConvolutionalSequence(layers24, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence24.initialize()
    out24 = conv_sequence24.apply(squeezed)
    i = i + 1

    ### 4 conv 3x3 ###
    layers31.append(Convolutional(filter_size=(3,3), num_channels=num_input2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers31.append(BatchNormalization(name='batch_{}'.format(i)))
    layers31.append(Rectifier())
    conv_sequence31 = ConvolutionalSequence(layers31, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence31.initialize()
    out31 = conv_sequence31.apply(squeezed)
    i = i + 1

    layers32.append(Convolutional(filter_size=(3,3), num_channels=num_input2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers32.append(BatchNormalization(name='batch_{}'.format(i)))
    layers32.append(Rectifier())
    conv_sequence32 = ConvolutionalSequence(layers32, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence32.initialize()
    out32 = conv_sequence32.apply(squeezed)
    i = i + 1

    layers33.append(Convolutional(filter_size=(3,3), num_channels=num_input2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers33.append(BatchNormalization(name='batch_{}'.format(i)))
    layers33.append(Rectifier())
    conv_sequence33 = ConvolutionalSequence(layers33, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence33.initialize()
    out33 = conv_sequence33.apply(squeezed)
    i = i + 1

    layers34.append(Convolutional(filter_size=(3,3), num_channels=num_input2, num_filters=conv3, image_size=image_shape, border_mode='half', name='conv_{}'.format(i)))
    layers34.append(BatchNormalization(name='batch_{}'.format(i)))
    layers34.append(Rectifier())
    conv_sequence34 = ConvolutionalSequence(layers34, num_channels=num_input2, image_size=image_shape, weights_init=Orthogonal(), use_bias=False, name='convSeq_{}'.format(i))
    conv_sequence34.initialize()
    out34 = conv_sequence34.apply(squeezed)
    i = i + 1

    #Merge
    return T.concatenate([out21, out22, out23, out24, out31, out32, out33, out34], axis=1)
def test_convolutional_layer():
	batch_size=2
	x = T.tensor4();
	y = T.ivector()
	V = 200
	layer_conv = Convolutional(filter_size=(5,5),num_filters=V,
				name="toto",
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	# try with no bias
	activation = Rectifier()
	pool = MaxPooling(pooling_size=(2,2))

	convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15,
					image_size=(10,10),
					name="conv_section")
	convnet.push_allocation_config()
	convnet.initialize()
	output=convnet.apply(x)
	batch_size=output.shape[0]
	output_dim=np.prod(convnet.get_dim('output'))
	result_conv = output.reshape((batch_size, output_dim))
	mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10],
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	mlp.initialize()
	output=mlp.apply(result_conv)
	cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output))
	cg = ComputationGraph(cost)
	W = VariableFilter(roles=[WEIGHT])(cg.variables)
	B = VariableFilter(roles=[BIAS])(cg.variables)
	W = W[-1]; b = B[-1]
	
	print W.shape.eval()
	print b.shape.eval()
	import pdb
	pdb.set_trace()
	inputs_conv = VariableFilter(roles=[INPUT], bricks=[Convolutional])(cg)
	outputs_conv = VariableFilter(roles=[OUTPUT], bricks=[Convolutional])(cg)
	var_input=inputs_conv[0]
	var_output=outputs_conv[0]
	
	[d_W,d_S,d_b] = T.grad(cost, [W, var_output, b])

	import pdb
	pdb.set_trace()
	w_shape = W.shape.eval()
	d_W = d_W.reshape((w_shape[0], w_shape[1]*w_shape[2]*w_shape[3]))

	d_b = T.zeros((w_shape[0],6*6))
	#d_b = d_b.reshape((w_shape[0], 8*8))
	d_p = T.concatenate([d_W, d_b], axis=1)
	d_S = d_S.dimshuffle((1, 0, 2, 3)).reshape((w_shape[0], batch_size, 6*6)).reshape((w_shape[0], batch_size*6*6))
	#d_S = d_S.reshape((2,200, 64))
	#x_value=1e3*np.random.ranf((1,15,10,10))
	x_value = 1e3*np.random.ranf((2,15, 10, 10))
	f = theano.function([x,y], [var_input, d_S, d_W], allow_input_downcast=True, on_unused_input='ignore')
	A, B, C= f(x_value, [5, 5])
	print np.mean(B)
	return
	
	E_A = expansion_op(A, (2, 15, 10, 10), (5,5))
	print E_A.shape
	E_A = E_A.reshape((2*36, C.shape[1]))
	print E_A.shape
	tmp = C - np.dot(B, E_A)
	print lin.norm(tmp, 'fro')
コード例 #36
0
    ConvolutionalLayer(Rectifier().apply, (3, 3), 32, (2, 2), name='l2')
]

convnet = ConvolutionalSequence(conv_layers,
                                num_channels=1,
                                image_size=(28, 28),
                                weights_init=IsotropicGaussian(0.1),
                                biases_init=Constant(0))

convnet.initialize()

output_dim = np.prod(convnet.get_dim('output'))
print(output_dim)

# Fully connected layers
features = Flattener().apply(convnet.apply(x))

mlp = MLP(activations=[Rectifier(), None],
          dims=[output_dim, 100, 10],
          weights_init=IsotropicGaussian(0.01),
          biases_init=Constant(0))
mlp.initialize()

y_hat = mlp.apply(features)

# numerically stable softmax
cost = Softmax().categorical_cross_entropy(y.flatten(), y_hat)
cost.name = 'nll'
error_rate = MisclassificationRate().apply(y.flatten(), y_hat)
#cost = MisclassificationRate().apply(y, y_hat)
#cost.name = 'error_rate'
コード例 #37
0
ファイル: build_model.py プロジェクト: gyom/voltmeleon
def build_submodel(input_shape,
                   output_dim,
                   L_dim_conv_layers,
                   L_filter_size,
                   L_pool_size,
                   L_activation_conv,
                   L_dim_full_layers,
                   L_activation_full,
                   L_exo_dropout_conv_layers,
                   L_exo_dropout_full_layers,
                   L_endo_dropout_conv_layers,
                   L_endo_dropout_full_layers,
                   L_border_mode=None,
                   L_filter_step=None,
                   L_pool_step=None):


    # TO DO : target size and name of the features

    x = T.tensor4('features')
    y = T.imatrix('targets')

    assert len(input_shape) == 3, "input_shape must be a 3d tensor"

    num_channels = input_shape[0]
    image_size = tuple(input_shape[1:])
    print image_size
    print num_channels
    prediction = output_dim

    # CONVOLUTION
    output_conv = x
    output_dim = num_channels*np.prod(image_size)
    conv_layers = []
    assert len(L_dim_conv_layers) == len(L_filter_size)
    if L_filter_step is None:
        L_filter_step = [None] * len(L_dim_conv_layers)
    assert len(L_dim_conv_layers) == len(L_pool_size)
    if L_pool_step is None:
        L_pool_step = [None] * len(L_dim_conv_layers)
    assert len(L_dim_conv_layers) == len(L_pool_step)
    assert len(L_dim_conv_layers) == len(L_activation_conv)
    if L_border_mode is None:
        L_border_mode = ["valid"] * len(L_dim_conv_layers)
    assert len(L_dim_conv_layers) == len(L_border_mode)
    assert len(L_dim_conv_layers) == len(L_endo_dropout_conv_layers)
    assert len(L_dim_conv_layers) == len(L_exo_dropout_conv_layers)

    # regarding the batch dropout : the dropout is applied on the filter
    # which is equivalent to the output dimension
    # you have to look at the dropout_rate of the next layer
    # that is why we need to have the first dropout value of L_exo_dropout_full_layers
    
    # the first value has to be 0.0 in this context, and we'll
    # assume that it is, but let's have an assert
    assert L_exo_dropout_conv_layers[0] == 0.0, "L_exo_dropout_conv_layers[0] has to be 0.0 in this context. There are ways to make it work, of course, but we don't support this with this scripts."

    # here modifitication of L_exo_dropout_conv_layers
    L_exo_dropout_conv_layers = L_exo_dropout_conv_layers[1:] + [L_exo_dropout_full_layers[0]]

    if len(L_dim_conv_layers):
        for (num_filters, filter_size, filter_step,
            pool_size, pool_step, activation_str, border_mode,
            dropout, index) in zip(L_dim_conv_layers,
                                  L_filter_size,
                                  L_filter_step,
                                  L_pool_size,
                                  L_pool_step,
                                  L_activation_conv,
                                  L_border_mode,
                                  L_exo_dropout_conv_layers,
                                  xrange(len(L_dim_conv_layers))
                                  ):

            # convert filter_size and pool_size in tuple
            filter_size = tuple(filter_size)

            if filter_step is None:
                filter_step = (1, 1)
            else:
                filter_step = tuple(filter_step)

            if pool_size is None:
                pool_size = (0,0)
            else:
                pool_size = tuple(pool_size)

            # TO DO : leaky relu
            if activation_str.lower() == 'rectifier':
                activation = Rectifier().apply
            elif activation_str.lower() == 'tanh':
                activation = Tanh().apply
            elif activation_str.lower() in ['sigmoid', 'logistic']:
                activation = Logistic().apply
            elif activation_str.lower() in ['id', 'identity']:
                activation = Identity().apply
            else:
                raise Exception("unknown activation function : %s", activation_str)

            assert 0.0 <= dropout and dropout < 1.0
            num_filters = num_filters - int(num_filters*dropout)

            print "border_mode : %s" % border_mode

            # filter_step
            # http://blocks.readthedocs.org/en/latest/api/bricks.html#module-blocks.bricks.conv

            kwargs = {}
            if filter_step is None or filter_step == (1,1):
                pass
            else:
                # there's a bit of a mix of names because `Convolutional` takes
                # a "step" argument, but `ConvolutionActivation` takes "conv_step" argument
                kwargs['conv_step'] = filter_step

            if (pool_size[0] == 0 and pool_size[1] == 0):
                layer_conv = ConvolutionalActivation(activation=activation,
                                                filter_size=filter_size,
                                                num_filters=num_filters,
                                                border_mode=border_mode,
                                                name="layer_%d" % index,
                                                **kwargs)
            else:
                if pool_step is None:
                    pass
                else:
                    kwargs['pooling_step'] = tuple(pool_step)

                layer_conv = ConvolutionalLayer(activation=activation,
                                                filter_size=filter_size,
                                                num_filters=num_filters,
                                                border_mode=border_mode,
                                                pooling_size=pool_size,
                                                name="layer_%d" % index,
                                                **kwargs)

            conv_layers.append(layer_conv)

        convnet = ConvolutionalSequence(conv_layers, num_channels=num_channels,
                                    image_size=image_size,
                                    weights_init=Uniform(width=0.1),
                                    biases_init=Constant(0.0),
                                    name="conv_section")
        convnet.push_allocation_config()
        convnet.initialize()
        output_dim = np.prod(convnet.get_dim('output'))
        output_conv = convnet.apply(output_conv)
        


    output_conv = Flattener().apply(output_conv)

    # FULLY CONNECTED
    output_mlp = output_conv
    full_layers = []
    assert len(L_dim_full_layers) == len(L_activation_full)
    assert len(L_dim_full_layers) + 1 == len(L_endo_dropout_full_layers)
    assert len(L_dim_full_layers) + 1 == len(L_exo_dropout_full_layers)

    # reguarding the batch dropout : the dropout is applied on the filter
    # which is equivalent to the output dimension
    # you have to look at the dropout_rate of the next layer
    # that is why we throw away the first value of L_exo_dropout_full_layers
    L_exo_dropout_full_layers = L_exo_dropout_full_layers[1:]
    pre_dim = output_dim
    print "When constructing the model, the output_dim of the conv section is %d." % output_dim
    if len(L_dim_full_layers):
        for (dim, activation_str,
            dropout, index) in zip(L_dim_full_layers,
                                  L_activation_full,
                                  L_exo_dropout_full_layers,
                                  range(len(L_dim_conv_layers),
                                        len(L_dim_conv_layers)+ 
                                        len(L_dim_full_layers))
                                   ):
                                          
                # TO DO : leaky relu
                if activation_str.lower() == 'rectifier':
                    activation = Rectifier().apply
                elif activation_str.lower() == 'tanh':
                    activation = Tanh().apply
                elif activation_str.lower() in ['sigmoid', 'logistic']:
                    activation = Logistic().apply
                elif activation_str.lower() in ['id', 'identity']:
                    activation = Identity().apply
                else:
                    raise Exception("unknown activation function : %s", activation_str)

                assert 0.0 <= dropout and dropout < 1.0
                dim = dim - int(dim*dropout)
                print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % (dropout, pre_dim, dim)

                layer_full = MLP(activations=[activation], dims=[pre_dim, dim],
                                 weights_init=Uniform(width=0.1),
                                 biases_init=Constant(0.0),
                                name="layer_%d" % index)
                layer_full.initialize()
                full_layers.append(layer_full)
                pre_dim = dim

        for layer in full_layers:
            output_mlp = layer.apply(output_mlp)

        output_dim = L_dim_full_layers[-1] - int(L_dim_full_layers[-1]*L_exo_dropout_full_layers[-1])

    # COST FUNCTION
    output_layer = Linear(output_dim, prediction,
                          weights_init=Uniform(width=0.1),
                          biases_init=Constant(0.0),
                          name="layer_"+str(len(L_dim_conv_layers)+ 
                                            len(L_dim_full_layers))
                          )
    output_layer.initialize()
    full_layers.append(output_layer)
    y_pred = output_layer.apply(output_mlp)
    y_hat = Softmax().apply(y_pred)
    # SOFTMAX and log likelihood
    y_pred = Softmax().apply(y_pred)
    # be careful. one version expects the output of a softmax; the other expects just the
    # output of the network
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_pred)
    #cost = Softmax().categorical_cross_entropy(y.flatten(), y_pred)
    cost.name = "cost"

    # Misclassification
    error_rate_brick = MisclassificationRate()
    error_rate = error_rate_brick.apply(y.flatten(), y_hat)
    error_rate.name = "error_rate"

    # put names

    D_params, D_kind = build_params(x, T.matrix(), conv_layers, full_layers)
    # test computation graph
    

    cg = ComputationGraph(cost)

    # DROPOUT
    L_endo_dropout = L_endo_dropout_conv_layers + L_endo_dropout_full_layers

    cg_dropout = cg
    inputs = VariableFilter(roles=[INPUT])(cg.variables)

    for (index, drop_rate) in enumerate(L_endo_dropout):
        for input_ in inputs:
            m = re.match(r"layer_(\d+)_apply.*", input_.name)
            if m and index == int(m.group(1)):
                if drop_rate < 0.0001:
                    print "Skipped applying dropout on %s because the dropout rate was under 0.0001." % input_.name
                    break
                else:
                    cg_dropout = apply_dropout(cg, [input_], drop_rate)
                    print "Applied dropout %f on %s." % (drop_rate, input_.name)
                    break


    cg = cg_dropout

    return (cg, error_rate, cost, D_params, D_kind)
コード例 #38
0
ファイル: main_cnn.py プロジェクト: DjAntaki/IFT6266H16
def build_and_run(label, config):
    ############## CREATE THE NETWORK ###############
    #Define the parameters
    num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config[
        'num_epochs'], config['num_batches'], config['num_channels'], config[
            'image_shape'], config['filter_size'], config[
                'num_filter'], config['pooling_sizes'], config[
                    'mlp_hiddens'], config['output_size'], config[
                        'batch_size'], config['activation'], config[
                            'mlp_activation']
    #    print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation)
    lambda_l1 = 0.000025
    lambda_l2 = 0.000025

    print("Building model")
    #Create the symbolics variable
    x = T.tensor4('image_features')
    y = T.lmatrix('targets')

    #Get the parameters
    conv_parameters = zip(filter_size, num_filter)

    #Create the convolutions layers
    conv_layers = list(
        interleave([(Convolutional(filter_size=filter_size,
                                   num_filters=num_filter,
                                   name='conv_{}'.format(i))
                     for i, (filter_size,
                             num_filter) in enumerate(conv_parameters)),
                    (activation),
                    (MaxPooling(size, name='pool_{}'.format(i))
                     for i, size in enumerate(pooling_sizes))]))
    #    (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))

    #Create the sequence
    conv_sequence = ConvolutionalSequence(conv_layers,
                                          num_channels,
                                          image_size=image_shape,
                                          weights_init=Uniform(width=0.2),
                                          biases_init=Constant(0.))
    #Initialize the convnet
    conv_sequence.initialize()
    #Add the MLP
    top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))
                    ] + mlp_hiddens + [output_size]
    out = Flattener().apply(conv_sequence.apply(x))
    mlp = MLP(mlp_activation,
              top_mlp_dims,
              weights_init=Uniform(0, 0.2),
              biases_init=Constant(0.))
    #Initialisze the MLP
    mlp.initialize()
    #Get the output
    predict = mlp.apply(out)

    cost = CategoricalCrossEntropy().apply(y.flatten(),
                                           predict).copy(name='cost')
    error = MisclassificationRate().apply(y.flatten(), predict)

    #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
    error_rate = error.copy(name='error_rate')
    error_rate2 = error.copy(name='error_rate2')

    ########### REGULARIZATION ##################
    cg = ComputationGraph([cost])
    weights = VariableFilter(roles=[WEIGHT])(cg.variables)
    biases = VariableFilter(roles=[BIAS])(cg.variables)
    # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
    l2_penalty = T.sum([
        lambda_l2 * (W**2).sum() for i, W in enumerate(weights + biases)
    ])  # Gradually increase penalty for layer
    # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases])
    # # #l2_penalty = l2_penalty_weights + l2_penalty_bias
    l2_penalty.name = 'l2_penalty'
    l1_penalty = T.sum([lambda_l1 * T.abs_(z).sum() for z in weights + biases])
    #  l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
    #  l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases])
    #  l1_penalty = l1_penalty_biases + l1_penalty_weights
    l1_penalty.name = 'l1_penalty'
    costreg = cost + l2_penalty + l1_penalty
    costreg.name = 'costreg'

    ########### DEFINE THE ALGORITHM #############
    #  algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum())
    algorithm = GradientDescent(cost=costreg,
                                parameters=cg.parameters,
                                step_rule=Adam())

    ########### GET THE DATA #####################
    istest = 'test' in config.keys()
    train_stream, valid_stream, test_stream = get_stream(batch_size,
                                                         image_shape,
                                                         test=istest)

    ########### INITIALIZING EXTENSIONS ##########
    checkpoint = Checkpoint('models/best_' + label + '.tar')
    checkpoint.add_condition(
        ['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far'))
    #Adding a live plot with the bokeh server
    plot = Plot(
        label,
        channels=[
            ['train_error_rate', 'valid_error_rate'],
            ['valid_cost', 'valid_error_rate2'],
            # ['train_costreg','train_grad_norm']], #
            [
                'train_costreg', 'train_total_gradient_norm',
                'train_l2_penalty', 'train_l1_penalty'
            ]
        ],
        server_url="http://hades.calculquebec.ca:5042")

    grad_norm = aggregation.mean(algorithm.total_gradient_norm)
    grad_norm.name = 'grad_norm'

    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches),
        DataStreamMonitoring([cost, error_rate, error_rate2],
                             valid_stream,
                             prefix="valid"),
        TrainingDataMonitoring([
            costreg, error_rate, error_rate2, grad_norm, l2_penalty, l1_penalty
        ],
                               prefix="train",
                               after_epoch=True),
        plot,
        ProgressBar(),
        Printing(),
        TrackTheBest('valid_error_rate', min),  #Keep best
        checkpoint,  #Save best
        FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4)
    ]  # Early-stopping
    model = Model(cost)
    main_loop = MainLoop(algorithm,
                         data_stream=train_stream,
                         model=model,
                         extensions=extensions)
    main_loop.run()
コード例 #39
0
ファイル: blocks_rnn_ex.py プロジェクト: davebs/deep_q_rl
    def __init__(self, rnn_dims, num_actions, data_X_np=None, data_y_np=None, width=32, height=32):
        ###############################################################
        #
        #       Network and data setup
        #
        ##############################################################
        RNN_DIMS = 100
        NUM_ACTIONS = num_actions

        tensor5 = T.TensorType('float32', [False, True, True, True, True])
        self.x = T.tensor4('features')
        self.reward = T.tensor3('targets', dtype='float32')
        self.state = T.matrix('states', dtype='float32')

        self.hidden_states = [] # holds hidden states in np array form

        
        #data_X & data_Y supplied in init function now...

        if data_X_np is None or data_y_np is None:
            print 'you did not supply data at init'
            data_X_np = np.float32(np.random.normal(size=(1280, 1,1, width, height)))
            data_y_np = np.float32(np.random.normal(size=(1280, 1,1,1)))
        #data_states_np = np.float32(np.ones((1280, 1, 100)))
        state_shape = (data_X_np.shape[0],rnn_dims)
        self.data_states_np = np.float32(np.zeros(state_shape))


        self.datastream = IterableDataset(dict(features=data_X_np,
                                            targets=data_y_np,
                                            states=self.data_states_np)).get_example_stream()
        self.datastream_test = IterableDataset(dict(features=data_X_np,
                                            targets=data_y_np,
                                            states=self.data_states_np)).get_example_stream()
        data_X = self.datastream


        # 2 conv inputs
        # we want to take our sequence of input images and convert them to convolutional
        # representations
        conv_layers = [ConvolutionalLayer(Rectifier().apply, (3, 3), 16, (2, 2), name='l1'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 32, (2, 2), name='l2'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 64, (2, 2), name='l3'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l4'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l5'),
                       ConvolutionalLayer(Rectifier().apply, (3, 3), 128, (2, 2), name='l6')]
        convnet = ConvolutionalSequence(conv_layers, num_channels=4,
                                        image_size=(width, height),
                                        weights_init=init.Uniform(0, 0.01),
                                        biases_init=init.Constant(0.0),
                                        tied_biases=False,
                                        border_mode='full')
        convnet.initialize()
        output_dim = np.prod(convnet.get_dim('output'))

        conv_out = convnet.apply(self.x)

        reshape_dims = (conv_out.shape[0], conv_out.shape[1]*conv_out.shape[2]*conv_out.shape[3])
        hidden_repr = conv_out.reshape(reshape_dims)
        conv2rnn = Linear(input_dim=output_dim, output_dim=RNN_DIMS, 
                            weights_init=init.Uniform(width=0.01),
                            biases_init=init.Constant(0.))
        conv2rnn.initialize()
        conv2rnn_output = conv2rnn.apply(hidden_repr)

        # RNN hidden layer
        # then we want to feed those conv representations into an RNN
        rnn = SimpleRecurrent(dim=RNN_DIMS, activation=Rectifier(), weights_init=init.Uniform(width=0.01))
        rnn.initialize()
        self.learned_state = rnn.apply(inputs=conv2rnn_output, states=self.state, iterate=False)


        # linear output from hidden layer
        # the RNN has two outputs, but only this one has a target. That is, this is "expected return"
        # which the network attempts to minimize difference between expected return and actual return
        lin_output = Linear(input_dim=RNN_DIMS, output_dim=1, 
                            weights_init=init.Uniform(width=0.01),
                            biases_init=init.Constant(0.))
        lin_output.initialize()
        self.exp_reward = lin_output.apply(self.learned_state)
        self.get_exp_reward = theano.function([self.x, self.state], self.exp_reward)

        # softmax output from hidden layer
        # this provides a softmax of action recommendations
        # the hypothesis is that adjusting the other outputs magically influences this set of outputs
        # to suggest smarter (or more realistic?) moves
        action_output = Linear(input_dim=RNN_DIMS, output_dim=NUM_ACTIONS, 
                            weights_init=init.Constant(.001), 
                            biases_init=init.Constant(0.))
        action_output.initialize()

        self.suggested_actions = Softmax().apply(action_output.apply(self.learned_state[-1]))

        ######################
        # use this to get suggested actions... it requires the state of the hidden units from the previous
        # timestep
        #####################
        self.get_suggested_actions = theano.function([self.x, self.state], [self.suggested_actions, self.learned_state])
コード例 #40
0
ファイル: test2.py プロジェクト: jpilaul/IFT6266_project
    b.ConvolutionalLayer(activation,
                         filter_size,
                         num_filters_,
                         pooling_size,
                         num_channels=3) for filter_size, num_filters_,
    pooling_size in zip(filter_sizes, num_filters, pooling_sizes)
]

convnet = ConvolutionalSequence(conv_layers,
                                num_channels=3,
                                image_size=(32, 32),
                                weights_init=Uniform(0, 0.2),
                                biases_init=Constant(0.))

convnet.initialize()
conv_features = Flattener().apply(convnet.apply(X))

# MLP

mlp = MLP(activations=[Logistic(name='sigmoid_0'),
                       Softmax(name='softmax_1')],
          dims=[256, 256, 256, 2],
          weights_init=IsotropicGaussian(0.01),
          biases_init=Constant(0))
[child.name for child in mlp.children]
['linear_0', 'sigmoid_0', 'linear_1', 'softmax_1']
Y = mlp.apply(conv_features)
mlp.initialize()

# Setting up the cost function
from blocks.bricks.cost import CategoricalCrossEntropy
コード例 #41
0
ファイル: convnet.py プロジェクト: bordesf/IFT6266
conv_layers = list(interleave([(ConvolutionalActivation(
                                  filter_size=filter_size,
                                  num_filters=num_filter,
                                  activation=activation,
                                  name='conv_{}'.format(i))
                for i, (activation, filter_size, num_filter)
                in enumerate(conv_parameters)),
        (MaxPooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))

#Create the sequence
conv_sequence = ConvolutionalSequence(conv_layers, num_channels, image_size=image_shape, weights_init=Uniform(width=0.2), biases_init=Constant(0.))
#Initialize the convnet
conv_sequence.initialize()
#Add the MLP
top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))] + mlp_hiddens + [output_size]
out = Flattener().apply(conv_sequence.apply(x))
mlp = MLP(mlp_activation, top_mlp_dims, weights_init=Uniform(0, 0.2),
          biases_init=Constant(0.))
#Initialisze the MLP
mlp.initialize()
#Get the output
predict = mlp.apply(out)

cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost')
error = MisclassificationRate().apply(y.flatten(), predict)
#Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
error_rate = error.copy(name='error_rate')
error_rate2 = error.copy(name='error_rate2')
cg = ComputationGraph([cost, error_rate])

########### GET THE DATA #####################
コード例 #42
0
                  num_filters=num_filter[j + 1],
                  step=conv_step,
                  border_mode=border_mode,
                  name='conv_{}'.format(i)))
conv_layers1.append(BatchNormalization(name='BNconv_{}'.format(i)))
conv_layers1.append(conv_activation[0])
conv_layers1.append(MaxPooling(pooling_size[j + 1], name='pool_{}'.format(i)))

conv_sequence = ConvolutionalSequence(conv_layers1,
                                      num_channels=num_channels,
                                      image_size=image_size,
                                      weights_init=Uniform(width=0.2),
                                      biases_init=Constant(0.),
                                      name='ConvSeq_{}'.format(i))
conv_sequence.initialize()
out = conv_sequence.apply(x)
#out = Flattener().apply(conv_sequence.apply(x))

################# Convolutional Sequence 2A and 2B #################
# conv_layers2 parameters
i = i + 1  #Sequence
j = 0  #Sub Layer
# 1st and 2nd are sequential; 3rd and 4th are sequential; 2 sequences are concatenated
filter_size = [(1, 1), (5, 5), (1, 1), (3, 3)]
num_filter = [32, 64, 64, 96]
num_channels = 3
pooling_size = None
conv_step = (1, 1)
border_mode = 'half'

out = intranet(i, j, out, image_size, filter_size, num_filter, num_channels,
コード例 #43
0
i = i + 1 #Sequence
conv_layers1.append(
    Convolutional(
        filter_size=filter_size[j+3],
        num_filters=num_filter[j+3],
        step=conv_step,
        border_mode=border_mode,
        name='conv_{}'.format(i)))
conv_layers1.append(BatchNormalization(name='BNconv_{}'.format(i)))
conv_layers1.append(conv_activation[0])
conv_layers1.append(MaxPooling(pooling_size[j+2], name='pool_{}'.format(i)))

conv_sequence1 = ConvolutionalSequence(conv_layers1, num_channels=num_channels, image_size=image_size,
weights_init=Uniform(width=0.2), biases_init=Constant(0.), name='ConvSeq1_{}'.format(i))
conv_sequence1.initialize()
out1 = conv_sequence1.apply(x)


################# Convolutional Sequence 2 #################
# conv_layers2 parameters
i = i+1 #Sequence
j = 0 #Sub Layer
filter_size = [(7,7), (5,5), (2,2), (5,5)]
num_filter = [16, 32, 48, 64]
num_channels = 3
pooling_size = [(3,3), (2,2), (2,2)]
conv_step = (1,1)
border_mode = 'valid'

conv_layers2 = []
conv_layers2.append(SpatialBatchNormalization(name='spatialBN_{}'.format(i)))
コード例 #44
0
def run_experiment():

    np.random.seed(42)

    #X = tensor.matrix('features')
    X = tensor.tensor4('features')
    y = tensor.matrix('targets')
    nbr_channels = 3
    image_shape = (30, 30)

    conv_layers = [
        ConvolutionalLayer(filter_size=(4, 4),
                           num_filters=10,
                           activation=Rectifier().apply,
                           border_mode='full',
                           pooling_size=(1, 1),
                           weights_init=Uniform(width=0.1),
                           biases_init=Constant(0.0),
                           name='conv0'),
        ConvolutionalLayer(filter_size=(3, 3),
                           num_filters=14,
                           activation=Rectifier().apply,
                           border_mode='full',
                           pooling_size=(1, 1),
                           weights_init=Uniform(width=0.1),
                           biases_init=Constant(0.0),
                           name='conv1')
    ]
    conv_sequence = ConvolutionalSequence(conv_layers,
                                          num_channels=nbr_channels,
                                          image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()
    conv_output_dim = np.prod(conv_sequence.get_dim('output'))
    #conv_output_dim = 25*25

    flattener = Flattener()

    mlp = MLP(activations=[Rectifier(), Rectifier(),
                           Softmax()],
              dims=[conv_output_dim, 50, 50, 10],
              weights_init=IsotropicGaussian(std=0.1),
              biases_init=IsotropicGaussian(std=0.01))
    mlp.initialize()

    conv_output = conv_sequence.apply(X)
    y_hat = mlp.apply(flattener.apply(conv_output))

    cost = CategoricalCrossEntropy().apply(y, y_hat)
    #cost = CategoricalCrossEntropy().apply(y_hat, y)
    #cost = BinaryCrossEntropy().apply(y.flatten(), y_hat.flatten())

    cg = ComputationGraph([y_hat])
    """
    print "--- INPUT ---"
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[INPUT])(cg.variables):
        print v.tag.annotations[0].name

    print "--- OUTPUT ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables):
        print v.tag.annotations[0].name

    print "--- WEIGHT ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables):
        print v.tag.annotations[0].name
    print "--- BIAS ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables):
        print v.tag.annotations[0].name
    """

    # check out .tag on the variables to see which layer they belong to

    print "----------------------------"

    D_by_layer = get_linear_transformation_roles(mlp, cg)

    # returns a vector with one entry for each in the mini-batch
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_linear_transformations(
        D_by_layer, cost)

    #import pprint
    #pp = pprint.PrettyPrinter(indent=4)
    #pp.pprint(get_conv_layers_transformation_roles(ComputationGraph(conv_output)).items())

    D_by_layer = get_conv_layers_transformation_roles(
        ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 += get_sum_square_norm_gradients_conv_transformations(
        D_by_layer, cost)

    print "There are %d entries in cg.parameters." % len(cg.parameters)
    L_grads_method_01 = [tensor.grad(cost, p) for p in cg.parameters]
    L_grads_method_02 = [
        tensor.grad(cost, v)
        for v in VariableFilter(roles=[WEIGHT, BIAS])(cg.variables)
    ]

    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_01 = sum(
        [tensor.sqr(g).sum() for g in L_grads_method_01])
    sum_square_norm_gradients_method_02 = sum(
        [tensor.sqr(g).sum() for g in L_grads_method_02])

    N = 8
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0],
                             image_shape[1]).astype(np.float32)

    # Option 1.
    ytrain = np.zeros((N, 10), dtype=np.float32)
    for n in range(N):
        label = np.random.randint(low=0, high=10)
        ytrain[n, label] = 1.0

    # Option 2, just to debug situations with NaN.
    #ytrain = np.random.rand(N, 10).astype(np.float32)
    #for n in range(N):
    #    ytrain[n,:] = ytrain[n,:] / ytrain[n,:].sum()

    f = theano.function([X, y], [
        cost, individual_sum_square_norm_gradients_method_00,
        sum_square_norm_gradients_method_01,
        sum_square_norm_gradients_method_02
    ])

    [c, v0, gs1, gs2] = f(Xtrain, ytrain)

    #print "[c, v0, gs1, gs2]"
    L_c, L_v0, L_gs1, L_gs2 = ([], [], [], [])
    for n in range(N):
        [nc, nv0, ngs1, ngs2] = f(
            Xtrain[n, :].reshape(
                (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])),
            ytrain[n, :].reshape((1, 10)))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs1.append(ngs1)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1, -1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs1).reshape((1, -1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1, -1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs1)))
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs1).reshape((1, -1)) / v0.reshape((1, -1))
コード例 #45
0
                  name='conv_2'))
layers.append(BatchNormalization(name='batch_2'))
layers.append(Rectifier())
layers.append(MaxPooling((3, 3), step=(2, 2), padding=(1, 1), name='pool_2'))

#Create the sequence
conv_sequence = ConvolutionalSequence(layers,
                                      num_channels=3,
                                      image_size=(160, 160),
                                      weights_init=Orthogonal(),
                                      use_bias=False,
                                      name='convSeq')
#Initialize the convnet
conv_sequence.initialize()
#Output the first result
out = conv_sequence.apply(x)

###############SECOND STAGE#####################
out2 = inception((20, 20), 192, 64, 96, 128, 16, 32, 32, out, 10)
out3 = inception((20, 20), 256, 128, 128, 192, 32, 96, 64, out2, 20)
out31 = MaxPooling((2, 2), name='poolLow').apply(out3)

out4 = inception((10, 10), 480, 192, 96, 208, 16, 48, 64, out31, 30)
out5 = inception((10, 10), 512, 160, 112, 224, 24, 64, 64, out4, 40)
out6 = inception((10, 10), 512, 128, 128, 256, 24, 64, 64, out5, 50)
out7 = inception((10, 10), 512, 112, 144, 288, 32, 64, 64, out6, 60)
out8 = inception((10, 10), 528, 256, 160, 320, 32, 128, 128, out7, 70)
out81 = MaxPooling((2, 2), name='poolLow1').apply(out8)

out9 = inception((5, 5), 832, 256, 160, 320, 32, 128, 128, out81, 80)
out10 = inception((5, 5), 832, 384, 192, 384, 48, 128, 128, out9, 90)
コード例 #46
0
ファイル: deepDream.py プロジェクト: bordesf/IFT6266
layers.append(Rectifier())
layers.append(MaxPooling((3, 3), step=(2, 2), padding=(1, 1), name="pool_0"))

layers.append(Convolutional(filter_size=(1, 1), num_filters=64, border_mode="half", name="conv_1"))
layers.append(Rectifier())
layers.append(Convolutional(filter_size=(3, 3), num_filters=192, border_mode="half", name="conv_2"))
layers.append(Rectifier())
layers.append(MaxPooling((3, 3), step=(2, 2), padding=(1, 1), name="pool_2"))

# Create the sequence
conv_sequence = ConvolutionalSequence(
    layers, num_channels=3, image_size=(None, None), weights_init=Orthogonal(), use_bias=False, name="convSeq"
)
# Initialize the convnet
# Output the first result
out = conv_sequence.apply(x)

###############SECOND STAGE#####################
out2 = inception((None, None), 192, 64, 96, 128, 16, 32, 32, out, 10)
out3 = inception((None, None), 256, 128, 128, 192, 32, 96, 64, out2, 20)
out31 = MaxPooling((2, 2), name="poolLow").apply(out3)
out4 = inception((None, None), 480, 192, 96, 208, 16, 48, 64, out31, 30)
out5 = inception((None, None), 512, 160, 112, 224, 24, 64, 64, out4, 40)

out6 = inception((None, None), 512, 128, 128, 256, 24, 64, 64, out5, 50)
out7 = inception((None, None), 512, 112, 144, 288, 32, 64, 64, out6, 60)
out8 = inception((None, None), 528, 256, 160, 320, 32, 128, 128, out7, 70)
out81 = MaxPooling((20, 20), name="poolLow1").apply(out8)
out9 = inception((None, None), 832, 256, 160, 320, 32, 128, 128, out81, 80)
out10 = inception((None, None), 832, 384, 192, 384, 48, 128, 128, out9, 90)
out91 = AveragePooling((5, 5), name="poolLow2").apply(out10)
コード例 #47
0
def intranet(i, j, out, image_size, filter_size, num_filter, num_channels,
             pooling_size, conv_step, border_mode, conv_activation):

    conv_layersA = []  #first intra convolutional sequence

    conv_layersA.append(
        Convolutional(filter_size=filter_size[j],
                      num_filters=num_filter[j],
                      step=conv_step,
                      border_mode=border_mode,
                      name='conv_A{}({})'.format(i, j)))
    conv_layersA.append(BatchNormalization(name='BNconv_A{}({})'.format(i, j)))
    conv_layersA.append(conv_activation[0])

    j = j + 1  #next sub layer
    conv_layersA.append(
        Convolutional(filter_size=filter_size[j],
                      num_filters=num_filter[j],
                      step=conv_step,
                      border_mode=border_mode,
                      name='conv_A{}({})'.format(i, j)))
    conv_layersA.append(BatchNormalization(name='BNconv_A{}({})'.format(i, j)))
    conv_layersA.append(conv_activation[0])

    conv_sequenceA = ConvolutionalSequence(conv_layersA,
                                           num_channels=num_channels,
                                           image_size=image_size,
                                           weights_init=Uniform(width=0.2),
                                           use_bias=False,
                                           name='convSeq_A{}'.format(i))
    out1 = conv_sequenceA.apply(out)

    conv_layersB = []  #second intra convolutional sequence

    j = j + 1  #next sub layer
    conv_layersB.append(
        Convolutional(filter_size=filter_size[j],
                      num_filters=num_filter[j],
                      step=conv_step,
                      border_mode=border_mode,
                      name='conv_B{}({})'.format(i, j)))
    conv_layersB.append(BatchNormalization(name='BNconv_B{}({})'.format(i, j)))
    conv_layersB.append(conv_activation[0])

    j = j + 1  #next sub layer
    conv_layersB.append(
        Convolutional(filter_size=filter_size[j],
                      num_filters=num_filter[j],
                      step=conv_step,
                      border_mode=border_mode,
                      name='conv_B{}({})'.format(i, j)))
    conv_layersB.append(BatchNormalization(name='BNconv_B{}({})'.format(i, j)))
    conv_layersB.append(conv_activation[0])

    conv_sequenceB = ConvolutionalSequence(conv_layersB,
                                           num_channels=num_channels,
                                           image_size=image_size,
                                           weights_init=Uniform(width=0.2),
                                           use_bias=False,
                                           name='convSeq_B{}'.format(i))
    out2 = conv_sequenceB.apply(out)

    #Merge
    return tensor.concatenate([out1, out2], axis=1)
コード例 #48
0
def create_network(inputs=None, batch=batch_size):
    if inputs is None:
        inputs = T.tensor4('features')
    x = T.cast(inputs,'float32')
    x = x / 255. if dataset != 'binarized_mnist' else x

    # GatedPixelCNN
    gated = GatedPixelCNN(
        name='gated_layer_0',
        filter_size=7,
        image_size=(img_dim,img_dim),
        num_filters=h*n_channel,
        num_channels=n_channel,
        batch_size=batch,
        weights_init=IsotropicGaussian(std=0.02, mean=0),
        biases_init=Constant(0.02),
        res=False
    )
    gated.initialize()
    x_v, x_h = gated.apply(x, x)

    for i in range(n_layer):
        gated = GatedPixelCNN(
            name='gated_layer_{}'.format(i+1),
            filter_size=3,
            image_size=(img_dim,img_dim),
            num_channels=h*n_channel,
            batch_size=batch,
            weights_init=IsotropicGaussian(std=0.02, mean=0),
            biases_init=Constant(0.02),
            res=True
        )
        gated.initialize()
        x_v, x_h = gated.apply(x_v, x_h)

    conv_list = []
    conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask_type='B', name='1x1_conv_1')])
    #conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask='B', name='1x1_conv_2')])
    conv_list.extend([Rectifier(), ConvolutionalNoFlip(*third_layer, mask_type='B', name='output_layer')])

    sequence = ConvolutionalSequence(
        conv_list,
        num_channels=h*n_channel,
        batch_size=batch,
        image_size=(img_dim,img_dim),
        border_mode='half',
        weights_init=IsotropicGaussian(std=0.02, mean=0),
        biases_init=Constant(0.02),
        tied_biases=False
    )
    sequence.initialize()
    x = sequence.apply(x_h)
    if MODE == '256ary':
        x = x.reshape((-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0,2,3,4,1)
        x = x.reshape((-1, 256))
        x_hat = Softmax().apply(x)
        inp = T.cast(inputs, 'int64').flatten()
        cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim
        cost_bits_dim = categorical_crossentropy(log_softmax(x), inp)
    else:
        x_hat = Logistic().apply(x)
        cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim
        #cost = T.nnet.binary_crossentropy(x_hat, inputs)
        #cost = cost.sum() / inputs.shape[0]
        cost_bits_dim = -(inputs * T.log2(x_hat) + (1.0 - inputs) * T.log2(1.0 - x_hat)).mean()

    cost_bits_dim.name = "nnl_bits_dim"
    cost.name = 'loglikelihood_nat'
    return cost, cost_bits_dim
コード例 #49
0
ファイル: convnet.py プロジェクト: stjordanis/IFT6266-2
                         num_filter) in enumerate(conv_parameters)),
                (MaxPooling(size, name='pool_{}'.format(i))
                 for i, size in enumerate(pooling_sizes))]))

#Create the sequence
conv_sequence = ConvolutionalSequence(conv_layers,
                                      num_channels,
                                      image_size=image_shape,
                                      weights_init=Uniform(width=0.2),
                                      biases_init=Constant(0.))
#Initialize the convnet
conv_sequence.initialize()
#Add the MLP
top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))
                ] + mlp_hiddens + [output_size]
out = Flattener().apply(conv_sequence.apply(x))
mlp = MLP(mlp_activation,
          top_mlp_dims,
          weights_init=Uniform(0, 0.2),
          biases_init=Constant(0.))
#Initialisze the MLP
mlp.initialize()
#Get the output
predict = mlp.apply(out)

cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost')
error = MisclassificationRate().apply(y.flatten(), predict)
#Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
error_rate = error.copy(name='error_rate')
error_rate2 = error.copy(name='error_rate2')
cg = ComputationGraph([cost, error_rate])
コード例 #50
0
def create_network(inputs=None, batch=batch_size):
    if inputs is None:
        inputs = T.tensor4('features')
    x = T.cast(inputs, 'float32')
    x = x / 255. if dataset != 'binarized_mnist' else x

    # GatedPixelCNN
    gated = GatedPixelCNN(name='gated_layer_0',
                          filter_size=7,
                          image_size=(img_dim, img_dim),
                          num_filters=h * n_channel,
                          num_channels=n_channel,
                          batch_size=batch,
                          weights_init=IsotropicGaussian(std=0.02, mean=0),
                          biases_init=Constant(0.02),
                          res=False)
    gated.initialize()
    x_v, x_h = gated.apply(x, x)

    for i in range(n_layer):
        gated = GatedPixelCNN(name='gated_layer_{}'.format(i + 1),
                              filter_size=3,
                              image_size=(img_dim, img_dim),
                              num_channels=h * n_channel,
                              batch_size=batch,
                              weights_init=IsotropicGaussian(std=0.02, mean=0),
                              biases_init=Constant(0.02),
                              res=True)
        gated.initialize()
        x_v, x_h = gated.apply(x_v, x_h)

    conv_list = []
    conv_list.extend([
        Rectifier(),
        ConvolutionalNoFlip((1, 1),
                            h * n_channel,
                            mask_type='B',
                            name='1x1_conv_1')
    ])
    #conv_list.extend([Rectifier(), ConvolutionalNoFlip((1,1), h*n_channel, mask='B', name='1x1_conv_2')])
    conv_list.extend([
        Rectifier(),
        ConvolutionalNoFlip(*third_layer, mask_type='B', name='output_layer')
    ])

    sequence = ConvolutionalSequence(conv_list,
                                     num_channels=h * n_channel,
                                     batch_size=batch,
                                     image_size=(img_dim, img_dim),
                                     border_mode='half',
                                     weights_init=IsotropicGaussian(std=0.02,
                                                                    mean=0),
                                     biases_init=Constant(0.02),
                                     tied_biases=False)
    sequence.initialize()
    x = sequence.apply(x_h)
    if MODE == '256ary':
        x = x.reshape(
            (-1, 256, n_channel, img_dim, img_dim)).dimshuffle(0, 2, 3, 4, 1)
        x = x.reshape((-1, 256))
        x_hat = Softmax().apply(x)
        inp = T.cast(inputs, 'int64').flatten()
        cost = CategoricalCrossEntropy().apply(inp, x_hat) * img_dim * img_dim
        cost_bits_dim = categorical_crossentropy(log_softmax(x), inp)
    else:
        x_hat = Logistic().apply(x)
        cost = BinaryCrossEntropy().apply(inputs, x_hat) * img_dim * img_dim
        #cost = T.nnet.binary_crossentropy(x_hat, inputs)
        #cost = cost.sum() / inputs.shape[0]
        cost_bits_dim = -(inputs * T.log2(x_hat) +
                          (1.0 - inputs) * T.log2(1.0 - x_hat)).mean()

    cost_bits_dim.name = "nnl_bits_dim"
    cost.name = 'loglikelihood_nat'
    return cost, cost_bits_dim
コード例 #51
0
def run_experiment():

    np.random.seed(42)

    X = tensor.tensor4('features')
    nbr_channels = 3
    image_shape = (5, 5)

    conv_layers = [ ConvolutionalLayer( filter_size=(2,2),
                                        num_filters=10,
                                        activation=Rectifier().apply,
                                        border_mode='valid',
                                        pooling_size=(1,1),
                                        weights_init=Uniform(width=0.1),
                                        #biases_init=Uniform(width=0.01),
                                        biases_init=Constant(0.0),
                                        name='conv0')]
    conv_sequence = ConvolutionalSequence(  conv_layers,
                                            num_channels=nbr_channels,
                                            image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()
    
    flattener = Flattener()
    conv_output = conv_sequence.apply(X)
    y_hat = flattener.apply(conv_output)
    # Whatever. Not important since we're not going to actually train anything.
    cost = tensor.sqr(y_hat).sum()


    #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)]
    L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[BIAS])(ComputationGraph([y_hat]).variables)]
    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_02 = sum([tensor.sqr(g).sum() for g in L_grads_method_02])


    D_by_layer = get_conv_layers_transformation_roles(ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations(D_by_layer, cost)


    # why does this thing depend on N again ?
    # I don't think I've used a cost that divides by N.

    N = 2
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32)
    #Xtrain[1:,:,:,:] = 0.0
    Xtrain[:,:,:,:] = 1.0

    convolution_filter_variable = VariableFilter(roles=[FILTER])(ComputationGraph([y_hat]).variables)[0]
    convolution_filter_variable_value = convolution_filter_variable.get_value()
    convolution_filter_variable_value[:,:,:,:] = 1.0
    #convolution_filter_variable_value[0,0,:,:] = 1.0
    convolution_filter_variable.set_value(convolution_filter_variable_value)

    f = theano.function([X],
                        [cost,
                            individual_sum_square_norm_gradients_method_00,
                            sum_square_norm_gradients_method_02])


    [c, v0, gs2] = f(Xtrain)

    #print "[c, v0, gs2]"
    L_c, L_v0, L_gs2 = ([], [], [])
    for n in range(N):
        [nc, nv0, ngs2] = f(Xtrain[n,:, :, :].reshape((1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1,-1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1,-1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs2).reshape((1,-1)) / v0.reshape((1,-1))
コード例 #52
0
                  num_filters=192,
                  border_mode='half',
                  name='conv_2'))
layers.append(Rectifier())
layers.append(MaxPooling((3, 3), step=(2, 2), padding=(1, 1), name='pool_2'))

#Create the sequence
conv_sequence = ConvolutionalSequence(layers,
                                      num_channels=3,
                                      image_size=(None, None),
                                      weights_init=Orthogonal(),
                                      use_bias=False,
                                      name='convSeq')
#Initialize the convnet
#Output the first result
out = conv_sequence.apply(x)

###############SECOND STAGE#####################
out2 = inception((None, None), 192, 64, 96, 128, 16, 32, 32, out, 10)
out3 = inception((None, None), 256, 128, 128, 192, 32, 96, 64, out2, 20)
out31 = MaxPooling((2, 2), name='poolLow').apply(out3)
out4 = inception((None, None), 480, 192, 96, 208, 16, 48, 64, out31, 30)
out5 = inception((None, None), 512, 160, 112, 224, 24, 64, 64, out4, 40)

out6 = inception((None, None), 512, 128, 128, 256, 24, 64, 64, out5, 50)
out7 = inception((None, None), 512, 112, 144, 288, 32, 64, 64, out6, 60)
out8 = inception((None, None), 528, 256, 160, 320, 32, 128, 128, out7, 70)
out81 = MaxPooling((20, 20), name='poolLow1').apply(out8)
out9 = inception((None, None), 832, 256, 160, 320, 32, 128, 128, out81, 80)
out10 = inception((None, None), 832, 384, 192, 384, 48, 128, 128, out9, 90)
out91 = AveragePooling((5, 5), name='poolLow2').apply(out10)