Python Flattener.apply Examples

Programming Language: Python

Namespace/Package Name: blocks.bricks.conv

Class/Type: Flattener

Method/Function: apply

Examples at hotexamples.com: 5

Python Flattener.apply - 5 examples found. These are the top rated real world Python examples of blocks.bricks.conv.Flattener.apply extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Flattener(22)

apply(2)

_push_allocation_config(1)

Example #1

Show file

File: classifier.py Project: mducoffe/Comparison_numpy_slice

class Classifier(Initializable):


    def __init__(self,
                    conv_seq, fully_connected,
                    **kwargs):
        super(Classifier, self).__init__(**kwargs)

        self.conv_seq = conv_seq
        self.flatten = Flattener()
        self.fully_connected = fully_connected
        
        self.children = [self.conv_seq, self.flatten, self.fully_connected]

    def get_dim(self, name):
        if name=="input":
            return self.conv_seq.get_dim(name)
        elif name == 'output':
            return self.fully_connected.get_dim(name)
        else:
            super(Classifier, self).get_dim(name)

    @application(inputs=['input_'], outputs=['output_'])
    def apply(self, input_):
        output_conv = self.conv_seq.apply(input_)
        input_fully = self.flatten.apply(output_conv)
        output = self.fully_connected.apply(input_fully)
        output.name = "output_"
        return output

Example #2

Show file

File: verifying_grad_conv_basic.py Project: alexmlamb/ImportanceSamplingSGD

def run_experiment():

    np.random.seed(42)

    X = tensor.tensor4('features')
    nbr_channels = 3
    image_shape = (5, 5)

    conv_layers = [ ConvolutionalLayer( filter_size=(2,2),
                                        num_filters=10,
                                        activation=Rectifier().apply,
                                        border_mode='valid',
                                        pooling_size=(1,1),
                                        weights_init=Uniform(width=0.1),
                                        #biases_init=Uniform(width=0.01),
                                        biases_init=Constant(0.0),
                                        name='conv0')]
    conv_sequence = ConvolutionalSequence(  conv_layers,
                                            num_channels=nbr_channels,
                                            image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()
    
    flattener = Flattener()
    conv_output = conv_sequence.apply(X)
    y_hat = flattener.apply(conv_output)
    # Whatever. Not important since we're not going to actually train anything.
    cost = tensor.sqr(y_hat).sum()


    #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)]
    L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[BIAS])(ComputationGraph([y_hat]).variables)]
    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_02 = sum([tensor.sqr(g).sum() for g in L_grads_method_02])


    D_by_layer = get_conv_layers_transformation_roles(ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations(D_by_layer, cost)


    # why does this thing depend on N again ?
    # I don't think I've used a cost that divides by N.

    N = 2
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32)
    #Xtrain[1:,:,:,:] = 0.0
    Xtrain[:,:,:,:] = 1.0

    convolution_filter_variable = VariableFilter(roles=[FILTER])(ComputationGraph([y_hat]).variables)[0]
    convolution_filter_variable_value = convolution_filter_variable.get_value()
    convolution_filter_variable_value[:,:,:,:] = 1.0
    #convolution_filter_variable_value[0,0,:,:] = 1.0
    convolution_filter_variable.set_value(convolution_filter_variable_value)

    f = theano.function([X],
                        [cost,
                            individual_sum_square_norm_gradients_method_00,
                            sum_square_norm_gradients_method_02])


    [c, v0, gs2] = f(Xtrain)

    #print "[c, v0, gs2]"
    L_c, L_v0, L_gs2 = ([], [], [])
    for n in range(N):
        [nc, nv0, ngs2] = f(Xtrain[n,:, :, :].reshape((1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1,-1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1,-1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs2).reshape((1,-1)) / v0.reshape((1,-1))

Example #3

Show file

File: verifying_grad_conv_basic.py Project: Jessilee/ImportanceSamplingSGD

def run_experiment():

    np.random.seed(42)

    X = tensor.tensor4('features')
    nbr_channels = 3
    image_shape = (5, 5)

    conv_layers = [
        ConvolutionalLayer(
            filter_size=(2, 2),
            num_filters=10,
            activation=Rectifier().apply,
            border_mode='valid',
            pooling_size=(1, 1),
            weights_init=Uniform(width=0.1),
            #biases_init=Uniform(width=0.01),
            biases_init=Constant(0.0),
            name='conv0')
    ]
    conv_sequence = ConvolutionalSequence(conv_layers,
                                          num_channels=nbr_channels,
                                          image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()

    flattener = Flattener()
    conv_output = conv_sequence.apply(X)
    y_hat = flattener.apply(conv_output)
    # Whatever. Not important since we're not going to actually train anything.
    cost = tensor.sqr(y_hat).sum()

    #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)]
    L_grads_method_02 = [
        tensor.grad(cost, v) for v in VariableFilter(
            roles=[BIAS])(ComputationGraph([y_hat]).variables)
    ]
    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_02 = sum(
        [tensor.sqr(g).sum() for g in L_grads_method_02])

    D_by_layer = get_conv_layers_transformation_roles(
        ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations(
        D_by_layer, cost)

    # why does this thing depend on N again ?
    # I don't think I've used a cost that divides by N.

    N = 2
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0],
                             image_shape[1]).astype(np.float32)
    #Xtrain[1:,:,:,:] = 0.0
    Xtrain[:, :, :, :] = 1.0

    convolution_filter_variable = VariableFilter(roles=[FILTER])(
        ComputationGraph([y_hat]).variables)[0]
    convolution_filter_variable_value = convolution_filter_variable.get_value()
    convolution_filter_variable_value[:, :, :, :] = 1.0
    #convolution_filter_variable_value[0,0,:,:] = 1.0
    convolution_filter_variable.set_value(convolution_filter_variable_value)

    f = theano.function([X], [
        cost, individual_sum_square_norm_gradients_method_00,
        sum_square_norm_gradients_method_02
    ])

    [c, v0, gs2] = f(Xtrain)

    #print "[c, v0, gs2]"
    L_c, L_v0, L_gs2 = ([], [], [])
    for n in range(N):
        [nc, nv0, ngs2] = f(Xtrain[n, :, :, :].reshape(
            (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1, -1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1, -1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs2).reshape((1, -1)) / v0.reshape((1, -1))

Example #4

Show file

File: verifying_grad_square_norm_with_conv_formula.py Project: alexmlamb/ImportanceSamplingSGD

def run_experiment():

    np.random.seed(42)

    #X = tensor.matrix('features')
    X = tensor.tensor4('features')
    y = tensor.matrix('targets')
    nbr_channels = 3
    image_shape = (30, 30)

    conv_layers = [ ConvolutionalLayer( filter_size=(4,4),
                                        num_filters=10,
                                        activation=Rectifier().apply,
                                        border_mode='full',
                                        pooling_size=(1,1),
                                        weights_init=Uniform(width=0.1),
                                        biases_init=Constant(0.0),
                                        name='conv0'),
                    ConvolutionalLayer( filter_size=(3,3),
                                        num_filters=14,
                                        activation=Rectifier().apply,
                                        border_mode='full',
                                        pooling_size=(1,1),
                                        weights_init=Uniform(width=0.1),
                                        biases_init=Constant(0.0),
                                        name='conv1')]
    conv_sequence = ConvolutionalSequence(  conv_layers,
                                            num_channels=nbr_channels,
                                            image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()
    conv_output_dim = np.prod(conv_sequence.get_dim('output'))
    #conv_output_dim = 25*25

    flattener = Flattener()

    mlp = MLP(  activations=[Rectifier(), Rectifier(), Softmax()],
                dims=[conv_output_dim, 50, 50, 10],
                weights_init=IsotropicGaussian(std=0.1), biases_init=IsotropicGaussian(std=0.01))
    mlp.initialize()

    conv_output = conv_sequence.apply(X)
    y_hat = mlp.apply(flattener.apply(conv_output))

    cost = CategoricalCrossEntropy().apply(y, y_hat)
    #cost = CategoricalCrossEntropy().apply(y_hat, y)
    #cost = BinaryCrossEntropy().apply(y.flatten(), y_hat.flatten())

    cg = ComputationGraph([y_hat])
    
    """
    print "--- INPUT ---"
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[INPUT])(cg.variables):
        print v.tag.annotations[0].name

    print "--- OUTPUT ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables):
        print v.tag.annotations[0].name

    print "--- WEIGHT ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables):
        print v.tag.annotations[0].name
    print "--- BIAS ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables):
        print v.tag.annotations[0].name
    """

    # check out .tag on the variables to see which layer they belong to

    print "----------------------------"


    D_by_layer = get_linear_transformation_roles(mlp, cg)

    # returns a vector with one entry for each in the mini-batch
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_linear_transformations(D_by_layer, cost)

    #import pprint
    #pp = pprint.PrettyPrinter(indent=4)
    #pp.pprint(get_conv_layers_transformation_roles(ComputationGraph(conv_output)).items())

    D_by_layer = get_conv_layers_transformation_roles(ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 += get_sum_square_norm_gradients_conv_transformations(D_by_layer, cost)



    print "There are %d entries in cg.parameters." % len(cg.parameters)
    L_grads_method_01 = [tensor.grad(cost, p) for p in cg.parameters]
    L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[WEIGHT, BIAS])(cg.variables)]

    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_01 = sum([tensor.sqr(g).sum() for g in L_grads_method_01])
    sum_square_norm_gradients_method_02 = sum([tensor.sqr(g).sum() for g in L_grads_method_02])

    N = 8
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32)

    # Option 1.
    ytrain = np.zeros((N, 10), dtype=np.float32)
    for n in range(N):
        label = np.random.randint(low=0, high=10)
        ytrain[n, label] = 1.0

    # Option 2, just to debug situations with NaN.
    #ytrain = np.random.rand(N, 10).astype(np.float32)
    #for n in range(N):
    #    ytrain[n,:] = ytrain[n,:] / ytrain[n,:].sum()


    f = theano.function([X,y],
                        [cost,
                            individual_sum_square_norm_gradients_method_00,
                            sum_square_norm_gradients_method_01,
                            sum_square_norm_gradients_method_02])

    [c, v0, gs1, gs2] = f(Xtrain, ytrain)

    #print "[c, v0, gs1, gs2]"
    L_c, L_v0, L_gs1, L_gs2 = ([], [], [], [])
    for n in range(N):
        [nc, nv0, ngs1, ngs2] = f(Xtrain[n,:].reshape((1,Xtrain.shape[1],Xtrain.shape[2], Xtrain.shape[3])), ytrain[n,:].reshape((1,10)))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs1.append(ngs1)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1,-1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs1).reshape((1,-1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1,-1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs1)))
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs1).reshape((1,-1)) / v0.reshape((1,-1))

Example #5

Show file

def run_experiment():

    np.random.seed(42)

    #X = tensor.matrix('features')
    X = tensor.tensor4('features')
    y = tensor.matrix('targets')
    nbr_channels = 3
    image_shape = (30, 30)

    conv_layers = [
        ConvolutionalLayer(filter_size=(4, 4),
                           num_filters=10,
                           activation=Rectifier().apply,
                           border_mode='full',
                           pooling_size=(1, 1),
                           weights_init=Uniform(width=0.1),
                           biases_init=Constant(0.0),
                           name='conv0'),
        ConvolutionalLayer(filter_size=(3, 3),
                           num_filters=14,
                           activation=Rectifier().apply,
                           border_mode='full',
                           pooling_size=(1, 1),
                           weights_init=Uniform(width=0.1),
                           biases_init=Constant(0.0),
                           name='conv1')
    ]
    conv_sequence = ConvolutionalSequence(conv_layers,
                                          num_channels=nbr_channels,
                                          image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()
    conv_output_dim = np.prod(conv_sequence.get_dim('output'))
    #conv_output_dim = 25*25

    flattener = Flattener()

    mlp = MLP(activations=[Rectifier(), Rectifier(),
                           Softmax()],
              dims=[conv_output_dim, 50, 50, 10],
              weights_init=IsotropicGaussian(std=0.1),
              biases_init=IsotropicGaussian(std=0.01))
    mlp.initialize()

    conv_output = conv_sequence.apply(X)
    y_hat = mlp.apply(flattener.apply(conv_output))

    cost = CategoricalCrossEntropy().apply(y, y_hat)
    #cost = CategoricalCrossEntropy().apply(y_hat, y)
    #cost = BinaryCrossEntropy().apply(y.flatten(), y_hat.flatten())

    cg = ComputationGraph([y_hat])
    """
    print "--- INPUT ---"
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[INPUT])(cg.variables):
        print v.tag.annotations[0].name

    print "--- OUTPUT ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables):
        print v.tag.annotations[0].name

    print "--- WEIGHT ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables):
        print v.tag.annotations[0].name
    print "--- BIAS ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables):
        print v.tag.annotations[0].name
    """

    # check out .tag on the variables to see which layer they belong to

    print "----------------------------"

    D_by_layer = get_linear_transformation_roles(mlp, cg)

    # returns a vector with one entry for each in the mini-batch
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_linear_transformations(
        D_by_layer, cost)

    #import pprint
    #pp = pprint.PrettyPrinter(indent=4)
    #pp.pprint(get_conv_layers_transformation_roles(ComputationGraph(conv_output)).items())

    D_by_layer = get_conv_layers_transformation_roles(
        ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 += get_sum_square_norm_gradients_conv_transformations(
        D_by_layer, cost)

    print "There are %d entries in cg.parameters." % len(cg.parameters)
    L_grads_method_01 = [tensor.grad(cost, p) for p in cg.parameters]
    L_grads_method_02 = [
        tensor.grad(cost, v)
        for v in VariableFilter(roles=[WEIGHT, BIAS])(cg.variables)
    ]

    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_01 = sum(
        [tensor.sqr(g).sum() for g in L_grads_method_01])
    sum_square_norm_gradients_method_02 = sum(
        [tensor.sqr(g).sum() for g in L_grads_method_02])

    N = 8
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0],
                             image_shape[1]).astype(np.float32)

    # Option 1.
    ytrain = np.zeros((N, 10), dtype=np.float32)
    for n in range(N):
        label = np.random.randint(low=0, high=10)
        ytrain[n, label] = 1.0

    # Option 2, just to debug situations with NaN.
    #ytrain = np.random.rand(N, 10).astype(np.float32)
    #for n in range(N):
    #    ytrain[n,:] = ytrain[n,:] / ytrain[n,:].sum()

    f = theano.function([X, y], [
        cost, individual_sum_square_norm_gradients_method_00,
        sum_square_norm_gradients_method_01,
        sum_square_norm_gradients_method_02
    ])

    [c, v0, gs1, gs2] = f(Xtrain, ytrain)

    #print "[c, v0, gs1, gs2]"
    L_c, L_v0, L_gs1, L_gs2 = ([], [], [], [])
    for n in range(N):
        [nc, nv0, ngs1, ngs2] = f(
            Xtrain[n, :].reshape(
                (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])),
            ytrain[n, :].reshape((1, 10)))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs1.append(ngs1)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1, -1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs1).reshape((1, -1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1, -1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs1)))
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs1).reshape((1, -1)) / v0.reshape((1, -1))