Example #1
0
def test_convolutional_sequence():
    x = tensor.tensor4('x')
    num_channels = 4
    pooling_size = 3
    batch_size = 5
    activation = Rectifier().apply

    conv = ConvolutionalLayer(activation, (3, 3),
                              5, (pooling_size, pooling_size),
                              weights_init=Constant(1.),
                              biases_init=Constant(5.))
    conv2 = ConvolutionalActivation(activation, (2, 2),
                                    4,
                                    weights_init=Constant(1.))

    seq = ConvolutionalSequence([conv, conv2],
                                num_channels,
                                image_size=(17, 13))
    seq.push_allocation_config()
    assert conv.num_channels == 4
    assert conv2.num_channels == 5
    conv2.convolution.use_bias = False
    y = seq.apply(x)
    seq.initialize()
    func = function([x], y)

    x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX)
    y_val = (numpy.ones((batch_size, 4, 4, 3)) * (9 * 4 + 5) * 4 * 5)
    assert_allclose(func(x_val), y_val)
Example #2
0
def test_convolutional_sequence():
    x = tensor.tensor4('x')
    num_channels = 4
    pooling_size = 3
    batch_size = 5
    activation = Rectifier().apply

    conv = ConvolutionalLayer(activation, (3, 3), 5,
                              (pooling_size, pooling_size),
                              weights_init=Constant(1.),
                              biases_init=Constant(5.))
    conv2 = ConvolutionalActivation(activation, (2, 2), 4,
                                    weights_init=Constant(1.))

    seq = ConvolutionalSequence([conv, conv2], num_channels,
                                image_size=(17, 13))
    seq.push_allocation_config()
    assert conv.num_channels == 4
    assert conv2.num_channels == 5
    conv2.convolution.use_bias = False
    y = seq.apply(x)
    seq.initialize()
    func = function([x], y)

    x_val = numpy.ones((batch_size, 4, 17, 13), dtype=theano.config.floatX)
    y_val = (numpy.ones((batch_size, 4, 4, 3)) *
             (9 * 4 + 5) * 4 * 5)
    assert_allclose(func(x_val), y_val)
def test_fully_layer():
	batch_size=2
	x = T.tensor4();
	y = T.ivector()
	V = 200
	layer_conv = Convolutional(filter_size=(5,5),num_filters=V,
				name="toto",
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	# try with no bias
	activation = Rectifier()
	pool = MaxPooling(pooling_size=(2,2))

	convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15,
					image_size=(10,10),
					name="conv_section")
	convnet.push_allocation_config()
	convnet.initialize()
	output=convnet.apply(x)
	batch_size=output.shape[0]
	output_dim=np.prod(convnet.get_dim('output'))
	result_conv = output.reshape((batch_size, output_dim))
	mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10],
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	mlp.initialize()
	output=mlp.apply(result_conv)
	cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output))
	cg = ComputationGraph(cost)
	W = VariableFilter(roles=[WEIGHT])(cg.variables)
	B = VariableFilter(roles=[BIAS])(cg.variables)
	W = W[0]; b = B[0]

	inputs_fully = VariableFilter(roles=[INPUT], bricks=[Linear])(cg)
	outputs_fully = VariableFilter(roles=[OUTPUT], bricks=[Linear])(cg)
	var_input=inputs_fully[0]
	var_output=outputs_fully[0]
	
	[d_W,d_S,d_b] = T.grad(cost, [W, var_output, b])

	d_b = d_b.dimshuffle(('x',0))
	d_p = T.concatenate([d_W, d_b], axis=0)
	x_value = 1e3*np.random.ranf((2,15, 10, 10))
	f = theano.function([x,y], [var_input, d_S, d_p], allow_input_downcast=True, on_unused_input='ignore')
	A, B, C= f(x_value, [5, 0])
	A = np.concatenate([A, np.ones((2,1))], axis=1)
	print 'A', A.shape
	print 'B', B.shape
	print 'C', C.shape

	print lin.norm(C - np.dot(np.transpose(A), B), 'fro')

	return
	
	"""
Example #4
0
def test_border_mode_not_pushed():
    layers = [Convolutional(border_mode='full'),
              ConvolutionalActivation(Rectifier().apply),
              ConvolutionalActivation(Rectifier().apply, border_mode='valid'),
              ConvolutionalLayer(Rectifier().apply, border_mode='full')]
    stack = ConvolutionalSequence(layers)
    stack.push_allocation_config()
    assert stack.children[0].border_mode == 'full'
    assert stack.children[1].border_mode == 'valid'
    assert stack.children[2].border_mode == 'valid'
    assert stack.children[3].border_mode == 'full'
    stack2 = ConvolutionalSequence(layers, border_mode='full')
    stack2.push_allocation_config()
    assert stack2.children[0].border_mode == 'full'
    assert stack2.children[1].border_mode == 'full'
    assert stack2.children[2].border_mode == 'full'
    assert stack2.children[3].border_mode == 'full'
Example #5
0
def test_border_mode_not_pushed():
    layers = [
        Convolutional(border_mode='full'),
        ConvolutionalActivation(Rectifier().apply),
        ConvolutionalActivation(Rectifier().apply, border_mode='valid'),
        ConvolutionalActivation(Rectifier().apply, border_mode='full')
    ]
    stack = ConvolutionalSequence(layers)
    stack.push_allocation_config()
    assert stack.children[0].border_mode == 'full'
    assert stack.children[1].border_mode == 'valid'
    assert stack.children[2].border_mode == 'valid'
    assert stack.children[3].border_mode == 'full'
    stack2 = ConvolutionalSequence(layers, border_mode='full')
    stack2.push_allocation_config()
    assert stack2.children[0].border_mode == 'full'
    assert stack2.children[1].border_mode == 'full'
    assert stack2.children[2].border_mode == 'full'
    assert stack2.children[3].border_mode == 'full'
Example #6
0
def create_model_brick():
    layers = [
        conv_brick(2, 1, 64),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_brick(7, 2, 128),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_brick(5, 2, 256),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_brick(7, 2, 256),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_brick(4, 1, 512),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, 2 * NLAT)
    ]
    encoder_mapping = ConvolutionalSequence(layers=layers,
                                            num_channels=NUM_CHANNELS,
                                            image_size=IMAGE_SIZE,
                                            use_bias=False,
                                            name='encoder_mapping')
    encoder = GaussianConditional(encoder_mapping, name='encoder')

    layers = [
        conv_transpose_brick(4, 1, 512),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_transpose_brick(7, 2, 256),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_transpose_brick(5, 2, 256),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_transpose_brick(7, 2, 128),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_transpose_brick(2, 1, 64),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, NUM_CHANNELS),
        Logistic()
    ]
    decoder_mapping = ConvolutionalSequence(layers=layers,
                                            num_channels=NLAT,
                                            image_size=(1, 1),
                                            use_bias=False,
                                            name='decoder_mapping')
    decoder = DeterministicConditional(decoder_mapping, name='decoder')

    layers = [
        conv_brick(2, 1, 64),
        LeakyRectifier(leak=LEAK),
        conv_brick(7, 2, 128),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_brick(5, 2, 256),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_brick(7, 2, 256),
        bn_brick(),
        LeakyRectifier(leak=LEAK),
        conv_brick(4, 1, 512),
        bn_brick(),
        LeakyRectifier(leak=LEAK)
    ]
    x_discriminator = ConvolutionalSequence(layers=layers,
                                            num_channels=NUM_CHANNELS,
                                            image_size=IMAGE_SIZE,
                                            use_bias=False,
                                            name='x_discriminator')
    x_discriminator.push_allocation_config()

    layers = [
        conv_brick(1, 1, 1024),
        LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, 1024),
        LeakyRectifier(leak=LEAK)
    ]
    z_discriminator = ConvolutionalSequence(layers=layers,
                                            num_channels=NLAT,
                                            image_size=(1, 1),
                                            use_bias=False,
                                            name='z_discriminator')
    z_discriminator.push_allocation_config()

    layers = [
        conv_brick(1, 1, 2048),
        LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, 2048),
        LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, 1)
    ]
    joint_discriminator = ConvolutionalSequence(
        layers=layers,
        num_channels=(x_discriminator.get_dim('output')[0] +
                      z_discriminator.get_dim('output')[0]),
        image_size=(1, 1),
        name='joint_discriminator')

    discriminator = XZJointDiscriminator(x_discriminator,
                                         z_discriminator,
                                         joint_discriminator,
                                         name='discriminator')

    ali = ALI(encoder,
              decoder,
              discriminator,
              weights_init=GAUSSIAN_INIT,
              biases_init=ZERO_INIT,
              name='ali')
    ali.push_allocation_config()
    encoder_mapping.layers[-1].use_bias = True
    encoder_mapping.layers[-1].tied_biases = False
    decoder_mapping.layers[-2].use_bias = True
    decoder_mapping.layers[-2].tied_biases = False
    x_discriminator.layers[0].use_bias = True
    x_discriminator.layers[0].tied_biases = True
    ali.initialize()
    raw_marginals, = next(
        create_celeba_data_streams(500, 500)[0].get_epoch_iterator())
    b_value = get_log_odds(raw_marginals)
    decoder_mapping.layers[-2].b.set_value(b_value)

    return ali
Example #7
0
        conv_brick(5, 1, 32),
        ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(4, 2, 64),
        ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(4, 1, 128),
        ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(4, 2, 256),
        ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(4, 1, 512),
        ConvMaxout(num_pieces=NUM_PIECES)
    ]
    x_discriminator = ConvolutionalSequence(layers=layers,
                                            num_channels=NUM_CHANNELS,
                                            image_size=IMAGE_SIZE,
                                            name='x_discriminator')
    x_discriminator.push_allocation_config()

    layers = [
        conv_brick(1, 1, 512),
        ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(1, 1, 512),
        ConvMaxout(num_pieces=NUM_PIECES)
    ]
    z_discriminator = ConvolutionalSequence(layers=layers,
                                            num_channels=NLAT,
                                            image_size=(1, 1),
                                            use_bias=False,
                                            name='z_discriminator')
    z_discriminator.push_allocation_config()

    layers = [
Example #8
0
def create_model_brick():
    layers = [
        conv_brick(5, 1, 32), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(4, 2, 64), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(4, 1, 128), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(4, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, 2 * NLAT)]
    encoder_mapping = ConvolutionalSequence(
        layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE,
        use_bias=False, name='encoder_mapping')
    encoder = GaussianConditional(encoder_mapping, name='encoder')

    layers = [
        conv_transpose_brick(4, 1, 256), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_transpose_brick(4, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_transpose_brick(4, 1, 64), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_transpose_brick(4, 2, 32), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_transpose_brick(5, 1, 32), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_transpose_brick(1, 1, 32), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, NUM_CHANNELS), Logistic()]
    decoder_mapping = ConvolutionalSequence(
        layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False,
        name='decoder_mapping')
    decoder = DeterministicConditional(decoder_mapping, name='decoder')

    layers = [
        conv_brick(5, 1, 32), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(4, 2, 64), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(4, 1, 128), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(4, 2, 256), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(4, 1, 512), ConvMaxout(num_pieces=NUM_PIECES)]
    x_discriminator = ConvolutionalSequence(
        layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE,
        name='x_discriminator')
    x_discriminator.push_allocation_config()

    layers = [
        conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES)]
    z_discriminator = ConvolutionalSequence(
        layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False,
        name='z_discriminator')
    z_discriminator.push_allocation_config()

    layers = [
        conv_brick(1, 1, 1024), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(1, 1, 1024), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(1, 1, 1)]
    joint_discriminator = ConvolutionalSequence(
        layers=layers,
        num_channels=(x_discriminator.get_dim('output')[0] +
                      z_discriminator.get_dim('output')[0]),
        image_size=(1, 1),
        name='joint_discriminator')

    discriminator = XZJointDiscriminator(
        x_discriminator, z_discriminator, joint_discriminator,
        name='discriminator')

    ali = ALI(encoder, decoder, discriminator,
              weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT,
              name='ali')
    ali.push_allocation_config()
    encoder_mapping.layers[-1].use_bias = True
    encoder_mapping.layers[-1].tied_biases = False
    decoder_mapping.layers[-2].use_bias = True
    decoder_mapping.layers[-2].tied_biases = False
    ali.initialize()
    raw_marginals, = next(
        create_cifar10_data_streams(500, 500)[0].get_epoch_iterator())
    b_value = get_log_odds(raw_marginals)
    decoder_mapping.layers[-2].b.set_value(b_value)

    return ali
Example #9
0
def build_submodel(input_shape,
                   output_dim,
                   L_dim_conv_layers,
                   L_filter_size,
                   L_pool_size,
                   L_activation_conv,
                   L_dim_full_layers,
                   L_activation_full,
                   L_exo_dropout_conv_layers,
                   L_exo_dropout_full_layers,
                   L_endo_dropout_conv_layers,
                   L_endo_dropout_full_layers,
                   L_border_mode=None,
                   L_filter_step=None,
                   L_pool_step=None):


    # TO DO : target size and name of the features

    x = T.tensor4('features')
    y = T.imatrix('targets')

    assert len(input_shape) == 3, "input_shape must be a 3d tensor"

    num_channels = input_shape[0]
    image_size = tuple(input_shape[1:])
    print image_size
    print num_channels
    prediction = output_dim

    # CONVOLUTION
    output_conv = x
    output_dim = num_channels*np.prod(image_size)
    conv_layers = []
    assert len(L_dim_conv_layers) == len(L_filter_size)
    if L_filter_step is None:
        L_filter_step = [None] * len(L_dim_conv_layers)
    assert len(L_dim_conv_layers) == len(L_pool_size)
    if L_pool_step is None:
        L_pool_step = [None] * len(L_dim_conv_layers)
    assert len(L_dim_conv_layers) == len(L_pool_step)
    assert len(L_dim_conv_layers) == len(L_activation_conv)
    if L_border_mode is None:
        L_border_mode = ["valid"] * len(L_dim_conv_layers)
    assert len(L_dim_conv_layers) == len(L_border_mode)
    assert len(L_dim_conv_layers) == len(L_endo_dropout_conv_layers)
    assert len(L_dim_conv_layers) == len(L_exo_dropout_conv_layers)

    # regarding the batch dropout : the dropout is applied on the filter
    # which is equivalent to the output dimension
    # you have to look at the dropout_rate of the next layer
    # that is why we need to have the first dropout value of L_exo_dropout_full_layers
    
    # the first value has to be 0.0 in this context, and we'll
    # assume that it is, but let's have an assert
    assert L_exo_dropout_conv_layers[0] == 0.0, "L_exo_dropout_conv_layers[0] has to be 0.0 in this context. There are ways to make it work, of course, but we don't support this with this scripts."

    # here modifitication of L_exo_dropout_conv_layers
    L_exo_dropout_conv_layers = L_exo_dropout_conv_layers[1:] + [L_exo_dropout_full_layers[0]]

    if len(L_dim_conv_layers):
        for (num_filters, filter_size, filter_step,
            pool_size, pool_step, activation_str, border_mode,
            dropout, index) in zip(L_dim_conv_layers,
                                  L_filter_size,
                                  L_filter_step,
                                  L_pool_size,
                                  L_pool_step,
                                  L_activation_conv,
                                  L_border_mode,
                                  L_exo_dropout_conv_layers,
                                  xrange(len(L_dim_conv_layers))
                                  ):

            # convert filter_size and pool_size in tuple
            filter_size = tuple(filter_size)

            if filter_step is None:
                filter_step = (1, 1)
            else:
                filter_step = tuple(filter_step)

            if pool_size is None:
                pool_size = (0,0)
            else:
                pool_size = tuple(pool_size)

            # TO DO : leaky relu
            if activation_str.lower() == 'rectifier':
                activation = Rectifier().apply
            elif activation_str.lower() == 'tanh':
                activation = Tanh().apply
            elif activation_str.lower() in ['sigmoid', 'logistic']:
                activation = Logistic().apply
            elif activation_str.lower() in ['id', 'identity']:
                activation = Identity().apply
            else:
                raise Exception("unknown activation function : %s", activation_str)

            assert 0.0 <= dropout and dropout < 1.0
            num_filters = num_filters - int(num_filters*dropout)

            print "border_mode : %s" % border_mode

            # filter_step
            # http://blocks.readthedocs.org/en/latest/api/bricks.html#module-blocks.bricks.conv

            kwargs = {}
            if filter_step is None or filter_step == (1,1):
                pass
            else:
                # there's a bit of a mix of names because `Convolutional` takes
                # a "step" argument, but `ConvolutionActivation` takes "conv_step" argument
                kwargs['conv_step'] = filter_step

            if (pool_size[0] == 0 and pool_size[1] == 0):
                layer_conv = ConvolutionalActivation(activation=activation,
                                                filter_size=filter_size,
                                                num_filters=num_filters,
                                                border_mode=border_mode,
                                                name="layer_%d" % index,
                                                **kwargs)
            else:
                if pool_step is None:
                    pass
                else:
                    kwargs['pooling_step'] = tuple(pool_step)

                layer_conv = ConvolutionalLayer(activation=activation,
                                                filter_size=filter_size,
                                                num_filters=num_filters,
                                                border_mode=border_mode,
                                                pooling_size=pool_size,
                                                name="layer_%d" % index,
                                                **kwargs)

            conv_layers.append(layer_conv)

        convnet = ConvolutionalSequence(conv_layers, num_channels=num_channels,
                                    image_size=image_size,
                                    weights_init=Uniform(width=0.1),
                                    biases_init=Constant(0.0),
                                    name="conv_section")
        convnet.push_allocation_config()
        convnet.initialize()
        output_dim = np.prod(convnet.get_dim('output'))
        output_conv = convnet.apply(output_conv)
        


    output_conv = Flattener().apply(output_conv)

    # FULLY CONNECTED
    output_mlp = output_conv
    full_layers = []
    assert len(L_dim_full_layers) == len(L_activation_full)
    assert len(L_dim_full_layers) + 1 == len(L_endo_dropout_full_layers)
    assert len(L_dim_full_layers) + 1 == len(L_exo_dropout_full_layers)

    # reguarding the batch dropout : the dropout is applied on the filter
    # which is equivalent to the output dimension
    # you have to look at the dropout_rate of the next layer
    # that is why we throw away the first value of L_exo_dropout_full_layers
    L_exo_dropout_full_layers = L_exo_dropout_full_layers[1:]
    pre_dim = output_dim
    print "When constructing the model, the output_dim of the conv section is %d." % output_dim
    if len(L_dim_full_layers):
        for (dim, activation_str,
            dropout, index) in zip(L_dim_full_layers,
                                  L_activation_full,
                                  L_exo_dropout_full_layers,
                                  range(len(L_dim_conv_layers),
                                        len(L_dim_conv_layers)+ 
                                        len(L_dim_full_layers))
                                   ):
                                          
                # TO DO : leaky relu
                if activation_str.lower() == 'rectifier':
                    activation = Rectifier().apply
                elif activation_str.lower() == 'tanh':
                    activation = Tanh().apply
                elif activation_str.lower() in ['sigmoid', 'logistic']:
                    activation = Logistic().apply
                elif activation_str.lower() in ['id', 'identity']:
                    activation = Identity().apply
                else:
                    raise Exception("unknown activation function : %s", activation_str)

                assert 0.0 <= dropout and dropout < 1.0
                dim = dim - int(dim*dropout)
                print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % (dropout, pre_dim, dim)

                layer_full = MLP(activations=[activation], dims=[pre_dim, dim],
                                 weights_init=Uniform(width=0.1),
                                 biases_init=Constant(0.0),
                                name="layer_%d" % index)
                layer_full.initialize()
                full_layers.append(layer_full)
                pre_dim = dim

        for layer in full_layers:
            output_mlp = layer.apply(output_mlp)

        output_dim = L_dim_full_layers[-1] - int(L_dim_full_layers[-1]*L_exo_dropout_full_layers[-1])

    # COST FUNCTION
    output_layer = Linear(output_dim, prediction,
                          weights_init=Uniform(width=0.1),
                          biases_init=Constant(0.0),
                          name="layer_"+str(len(L_dim_conv_layers)+ 
                                            len(L_dim_full_layers))
                          )
    output_layer.initialize()
    full_layers.append(output_layer)
    y_pred = output_layer.apply(output_mlp)
    y_hat = Softmax().apply(y_pred)
    # SOFTMAX and log likelihood
    y_pred = Softmax().apply(y_pred)
    # be careful. one version expects the output of a softmax; the other expects just the
    # output of the network
    cost = CategoricalCrossEntropy().apply(y.flatten(), y_pred)
    #cost = Softmax().categorical_cross_entropy(y.flatten(), y_pred)
    cost.name = "cost"

    # Misclassification
    error_rate_brick = MisclassificationRate()
    error_rate = error_rate_brick.apply(y.flatten(), y_hat)
    error_rate.name = "error_rate"

    # put names

    D_params, D_kind = build_params(x, T.matrix(), conv_layers, full_layers)
    # test computation graph
    

    cg = ComputationGraph(cost)

    # DROPOUT
    L_endo_dropout = L_endo_dropout_conv_layers + L_endo_dropout_full_layers

    cg_dropout = cg
    inputs = VariableFilter(roles=[INPUT])(cg.variables)

    for (index, drop_rate) in enumerate(L_endo_dropout):
        for input_ in inputs:
            m = re.match(r"layer_(\d+)_apply.*", input_.name)
            if m and index == int(m.group(1)):
                if drop_rate < 0.0001:
                    print "Skipped applying dropout on %s because the dropout rate was under 0.0001." % input_.name
                    break
                else:
                    cg_dropout = apply_dropout(cg, [input_], drop_rate)
                    print "Applied dropout %f on %s." % (drop_rate, input_.name)
                    break


    cg = cg_dropout

    return (cg, error_rate, cost, D_params, D_kind)
def create_model_brick():
    # Encoder
    enc_layers = [
        conv_brick(2, 1, 64), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(7, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(5, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(7, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, 2 * NLAT)]

    encoder_mapping = EncoderMapping(layers=enc_layers,
                                     num_channels=NUM_CHANNELS,
                                     n_emb=NEMB,
                                     image_size=IMAGE_SIZE, weights_init=GAUSSIAN_INIT,
                                     biases_init=ZERO_INIT,
                                     use_bias=False)

    encoder = GaussianConditional(encoder_mapping, name='encoder')
    # Decoder
    dec_layers = [
        conv_transpose_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_transpose_brick(7, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_transpose_brick(5, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_transpose_brick(7, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_transpose_brick(2, 1, 64), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, NUM_CHANNELS), Logistic()]

    decoder = Decoder(
        layers=dec_layers, num_channels=NLAT + NEMB, image_size=(1, 1), use_bias=False,
        name='decoder_mapping')
    # Discriminator
    layers = [
        conv_brick(2, 1, 64), LeakyRectifier(leak=LEAK),
        conv_brick(7, 2, 128), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(5, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(7, 2, 256), bn_brick(), LeakyRectifier(leak=LEAK),
        conv_brick(4, 1, 512), bn_brick(), LeakyRectifier(leak=LEAK)]
    x_discriminator = ConvolutionalSequence(
        layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE,
        use_bias=False, name='x_discriminator')
    x_discriminator.push_allocation_config()

    layers = [
        conv_brick(1, 1, 1024), LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, 1024), LeakyRectifier(leak=LEAK)]
    z_discriminator = ConvolutionalSequence(
        layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False,
        name='z_discriminator')
    z_discriminator.push_allocation_config()

    layers = [
        conv_brick(1, 1, 2048), LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, 2048), LeakyRectifier(leak=LEAK),
        conv_brick(1, 1, 1)]
    joint_discriminator = ConvolutionalSequence(
        layers=layers,
        num_channels=(x_discriminator.get_dim('output')[0] +
                      z_discriminator.get_dim('output')[0] +
                      NEMB),
        image_size=(1, 1),
        name='joint_discriminator')

    discriminator = XZYJointDiscriminator(
        x_discriminator, z_discriminator, joint_discriminator,
        name='discriminator')

    ali = ConditionalALI(encoder, decoder, discriminator,
                         n_cond=NCLASSES, n_emb=NEMB,
                         weights_init=GAUSSIAN_INIT, biases_init=ZERO_INIT,
                         name='ali')
    ali.push_allocation_config()
    encoder_mapping.layers[-1].use_bias = True
    encoder_mapping.layers[-1].tied_biases = False
    decoder.layers[-2].use_bias = True
    decoder.layers[-2].tied_biases = False
    x_discriminator.layers[0].use_bias = True
    x_discriminator.layers[0].tied_biases = True
    ali.initialize()
    raw_marginals, = next(
        create_celeba_data_streams(500, 500)[0].get_epoch_iterator())
    b_value = get_log_odds(raw_marginals)
    decoder.layers[-2].b.set_value(b_value)

    return ali
def test_convolutional_layer():
	batch_size=2
	x = T.tensor4();
	y = T.ivector()
	V = 200
	layer_conv = Convolutional(filter_size=(5,5),num_filters=V,
				name="toto",
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	# try with no bias
	activation = Rectifier()
	pool = MaxPooling(pooling_size=(2,2))

	convnet = ConvolutionalSequence([layer_conv, activation, pool], num_channels=15,
					image_size=(10,10),
					name="conv_section")
	convnet.push_allocation_config()
	convnet.initialize()
	output=convnet.apply(x)
	batch_size=output.shape[0]
	output_dim=np.prod(convnet.get_dim('output'))
	result_conv = output.reshape((batch_size, output_dim))
	mlp=MLP(activations=[Rectifier().apply], dims=[output_dim, 10],
				weights_init=IsotropicGaussian(0.01),
				biases_init=Constant(0.0))
	mlp.initialize()
	output=mlp.apply(result_conv)
	cost = T.mean(Softmax().categorical_cross_entropy(y.flatten(), output))
	cg = ComputationGraph(cost)
	W = VariableFilter(roles=[WEIGHT])(cg.variables)
	B = VariableFilter(roles=[BIAS])(cg.variables)
	W = W[-1]; b = B[-1]
	
	print W.shape.eval()
	print b.shape.eval()
	import pdb
	pdb.set_trace()
	inputs_conv = VariableFilter(roles=[INPUT], bricks=[Convolutional])(cg)
	outputs_conv = VariableFilter(roles=[OUTPUT], bricks=[Convolutional])(cg)
	var_input=inputs_conv[0]
	var_output=outputs_conv[0]
	
	[d_W,d_S,d_b] = T.grad(cost, [W, var_output, b])

	import pdb
	pdb.set_trace()
	w_shape = W.shape.eval()
	d_W = d_W.reshape((w_shape[0], w_shape[1]*w_shape[2]*w_shape[3]))

	d_b = T.zeros((w_shape[0],6*6))
	#d_b = d_b.reshape((w_shape[0], 8*8))
	d_p = T.concatenate([d_W, d_b], axis=1)
	d_S = d_S.dimshuffle((1, 0, 2, 3)).reshape((w_shape[0], batch_size, 6*6)).reshape((w_shape[0], batch_size*6*6))
	#d_S = d_S.reshape((2,200, 64))
	#x_value=1e3*np.random.ranf((1,15,10,10))
	x_value = 1e3*np.random.ranf((2,15, 10, 10))
	f = theano.function([x,y], [var_input, d_S, d_W], allow_input_downcast=True, on_unused_input='ignore')
	A, B, C= f(x_value, [5, 5])
	print np.mean(B)
	return
	
	E_A = expansion_op(A, (2, 15, 10, 10), (5,5))
	print E_A.shape
	E_A = E_A.reshape((2*36, C.shape[1]))
	print E_A.shape
	tmp = C - np.dot(B, E_A)
	print lin.norm(tmp, 'fro')
def build_submodel(image_size,
                   num_channels,
                   L_dim_conv_layers,
                   L_filter_size,
                   L_pool_size,
                   L_activation_conv,
                   L_dim_full_layers,
                   L_activation_full,
                   dropout,
                   prediction,
                   allow_comment=False,
                   sub_dropout=0,
                   L_pool_step=[],
                   L_pool_padding=[]):

    # CONVOLUTION
    params_channels = [10**(-i) for i in range(len(L_dim_conv_layers) + 1)]
    index_params = 0
    params_channels.reverse()
    output_dim = num_channels * np.prod(image_size)
    conv_layers = []
    assert len(L_dim_conv_layers) == len(L_filter_size)
    assert len(L_dim_conv_layers) == len(L_pool_size)
    assert len(L_dim_conv_layers) == len(L_activation_conv)
    if len(L_pool_step) == 0:
        L_pool_step = [(1, 1) for i in range(len(L_dim_conv_layers))]
        L_pool_padding = [(0, 0) for i in range(len(L_dim_conv_layers))]
    assert len(L_dim_conv_layers) == len(L_pool_step)
    assert len(L_dim_conv_layers) == len(L_pool_padding)
    L_conv_dropout = [dropout] * len(
        L_dim_conv_layers)  # unique value of dropout for now
    convnet = None
    mlp = None
    if len(L_dim_conv_layers):
        for (num_filters, filter_size, pool_size, activation_str, dropout,
             index, step, padding) in zip(L_dim_conv_layers, L_filter_size,
                                          L_pool_size, L_activation_conv,
                                          L_conv_dropout,
                                          xrange(len(L_dim_conv_layers)),
                                          L_pool_step, L_pool_padding):

            # convert filter_size and pool_size in tuple
            filter_size = tuple(filter_size)

            if pool_size is None:
                pool_size = (0, 0)
            else:
                pool_size = tuple(pool_size)

            # TO DO : leaky relu
            if activation_str.lower() == 'rectifier':
                activation = Rectifier()
            elif activation_str.lower() == 'tanh':
                activation = Tanh()
            elif activation_str.lower() in ['sigmoid', 'logistic']:
                activation = Logistic()
            elif activation_str.lower() in ['id', 'identity']:
                activation = Identity()
            else:
                raise Exception("unknown activation function : %s",
                                activation_str)

            assert 0.0 <= dropout and dropout < 1.0
            num_filters = num_filters - int(num_filters * dropout)

            layer_conv = Convolutional(filter_size=filter_size,
                                       num_filters=num_filters,
                                       name="layer_%d" % index,
                                       weights_init=IsotropicGaussian(0.01),
                                       biases_init=Constant(0.0))
            conv_layers.append(layer_conv)
            conv_layers.append(activation)
            index_params += 1
            if not (pool_size[0] == 0 and pool_size[1] == 0):
                #pool = MaxPooling(pooling_size=pool_size, step=step, padding=padding)
                pool = MaxPooling(pooling_size=pool_size)
                conv_layers.append(pool)

        convnet = ConvolutionalSequence(conv_layers,
                                        num_channels=num_channels,
                                        image_size=image_size,
                                        name="conv_section")
        convnet.push_allocation_config()
        convnet.initialize()
        output_dim = np.prod(convnet.get_dim('output'))

    # MLP
    assert len(L_dim_full_layers) == len(L_activation_full)
    L_full_dropout = [dropout] * len(
        L_dim_full_layers)  # unique value of dropout for now

    # reguarding the batch dropout : the dropout is applied on the filter
    # which is equivalent to the output dimension
    # you have to look at the dropout_rate of the next layer
    # that is why we throw away the first value of L_exo_dropout_full_layers
    pre_dim = output_dim
    if allow_comment:
        print "When constructing the model, the output_dim of the conv section is %d." % output_dim
    activations = []
    dims = [pre_dim]
    if len(L_dim_full_layers):
        for (dim, activation_str, dropout, index) in zip(
                L_dim_full_layers, L_activation_full, L_full_dropout,
                range(len(L_dim_conv_layers),
                      len(L_dim_conv_layers) + len(L_dim_full_layers))):

            # TO DO : leaky relu
            if activation_str.lower() == 'rectifier':
                activation = Rectifier().apply
            elif activation_str.lower() == 'tanh':
                activation = Tanh().apply
            elif activation_str.lower() in ['sigmoid', 'logistic']:
                activation = Logistic().apply
            elif activation_str.lower() in ['id', 'identity']:
                activation = Identity().apply
            else:
                raise Exception("unknown activation function : %s",
                                activation_str)
            activations.append(activation)
            assert 0.0 <= dropout and dropout < 1.0
            dim = dim - int(dim * dropout)
            if allow_comment:
                print "When constructing the fully-connected section, we apply dropout %f to add an MLP going from pre_dim %d to dim %d." % (
                    dropout, pre_dim, dim)
            dims.append(dim)
        #now construct the full MLP in one pass:

    activations.append(Identity())
    #params_channels[index_params]
    dims.append(prediction)
    mlp = MLP(activations=activations,
              dims=dims,
              weights_init=IsotropicGaussian(0.1),
              biases_init=Constant(0.0),
              name="layer_%d" % index)
    mlp.push_allocation_config()
    mlp.initialize()
    return (convnet, mlp)
Example #13
0
    decoder.initialize()
    decoder_fun = function([z, y], decoder.apply(z, embeddings))
    out = decoder_fun(z_hat, test_labels)

    # Discriminator

    layers = [
        conv_brick(5, 1, 32), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(4, 2, 64), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(4, 1, 128), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(4, 2, 256), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(4, 1, 512), ConvMaxout(num_pieces=NUM_PIECES)]
    x_discriminator = ConvolutionalSequence(
        layers=layers, num_channels=NUM_CHANNELS, image_size=IMAGE_SIZE,
        name='x_discriminator')
    x_discriminator.push_allocation_config()

    layers = [
        conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(1, 1, 512), ConvMaxout(num_pieces=NUM_PIECES)]
    z_discriminator = ConvolutionalSequence(
        layers=layers, num_channels=NLAT, image_size=(1, 1), use_bias=False,
        name='z_discriminator')
    z_discriminator.push_allocation_config()

    layers = [
        conv_brick(1, 1, 1024), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(1, 1, 1024), ConvMaxout(num_pieces=NUM_PIECES),
        conv_brick(1, 1, 1)]
    joint_discriminator = ConvolutionalSequence(
        layers=layers,