def __init__(self, conv_seq=None, mlp=None, **kwargs):
        self.conv_seq = conv_seq
        self.mlp = mlp
        self.flatten = Flattener()
        self.T = 1.

        application_methods = [
            self.conv_seq.apply, self.flatten.apply, self.mlp.apply
        ]
        super(CompositeSequence,
              self).__init__(application_methods=application_methods, **kwargs)
예제 #2
0
class Classifier(Initializable):


    def __init__(self,
                    conv_seq, fully_connected,
                    **kwargs):
        super(Classifier, self).__init__(**kwargs)

        self.conv_seq = conv_seq
        self.flatten = Flattener()
        self.fully_connected = fully_connected
        
        self.children = [self.conv_seq, self.flatten, self.fully_connected]

    def get_dim(self, name):
        if name=="input":
            return self.conv_seq.get_dim(name)
        elif name == 'output':
            return self.fully_connected.get_dim(name)
        else:
            super(Classifier, self).get_dim(name)

    @application(inputs=['input_'], outputs=['output_'])
    def apply(self, input_):
        output_conv = self.conv_seq.apply(input_)
        input_fully = self.flatten.apply(output_conv)
        output = self.fully_connected.apply(input_fully)
        output.name = "output_"
        return output
예제 #3
0
def create_OLD_kim_cnn(layer0_input, embedding_size, input_len, config, pref):
    '''
        One layer convolution with the same filtersize
    '''
    filter_width_list = [
        int(fw) for fw in config[pref + '_filterwidth'].split()
    ]
    print filter_width_list
    num_filters = int(config[pref + '_num_filters'])
    totfilters = 0
    for i, fw in enumerate(filter_width_list):
        num_feature_map = input_len - fw + 1  #39
        conv = Convolutional(filter_size=(fw, embedding_size),
                             num_filters=num_filters,
                             num_channels=1,
                             image_size=(input_len, embedding_size),
                             name="conv" + str(fw))
        pooling = MaxPooling((num_feature_map, 1), name="pool" + str(fw))
        initialize([conv])

        totfilters += num_filters
        outpool = Flattener(name="flat" + str(fw)).apply(
            Rectifier(name=pref + 'act_' + str(fw)).apply(
                pooling.apply(conv.apply(layer0_input))))
        if i == 0:
            outpools = outpool
        else:
            outpools = T.concatenate([outpools, outpool], axis=1)
    name_rep_len = totfilters
    return outpools, name_rep_len
예제 #4
0
    def __init__(self, **kwargs):
        conv_layers = [
            Convolutional(filter_size=(3, 3), num_filters=64,
                          border_mode=(1, 1), name='conv_1'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=64,
                          border_mode=(1, 1), name='conv_2'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_2'),

            Convolutional(filter_size=(3, 3), num_filters=128,
                          border_mode=(1, 1), name='conv_3'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=128,
                          border_mode=(1, 1), name='conv_4'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_4'),

            Convolutional(filter_size=(3, 3), num_filters=256,
                          border_mode=(1, 1), name='conv_5'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=256,
                          border_mode=(1, 1), name='conv_6'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=256,
                          border_mode=(1, 1), name='conv_7'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_7'),

            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_8'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_9'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_10'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_10'),

            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_11'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_12'),
            Rectifier(),
            Convolutional(filter_size=(3, 3), num_filters=512,
                          border_mode=(1, 1), name='conv_13'),
            Rectifier(),
            MaxPooling((2, 2), step=(2, 2), name='pool_13'),
        ]

        mlp = MLP([Rectifier(name='fc_14'), Rectifier('fc_15'), Softmax()],
                  [25088, 4096, 4096, 1000],
                  )
        conv_sequence = ConvolutionalSequence(
            conv_layers, 3, image_size=(224, 224))

        super(VGGNet, self).__init__(
            [conv_sequence.apply, Flattener().apply, mlp.apply], **kwargs)
예제 #5
0
def build_model(images, labels):
    
    # Construct a bottom convolutional sequence
    bottom_conv_sequence = convolutional_sequence((3,3), 16, (160, 160))
    bottom_conv_sequence._push_allocation_config()
    
    # Flatten layer
    flattener = Flattener()

    # Construct a top MLP
    conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output'))
    #top_mlp = MLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0))
    top_mlp = BatchNormalizedMLP([Rectifier(name='non_linear_9'), Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10], weights_init=IsotropicGaussian(), biases_init=Constant(0))
    
    # Construct feedforward sequence
    ss_seq = FeedforwardSequence([bottom_conv_sequence.apply, flattener.apply, top_mlp.apply])
    ss_seq.push_initialization_config()
    ss_seq.initialize()
    
    prediction = ss_seq.apply(images)
    cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction)

    # add regularization
    selector = Selector([top_mlp])
    Ws = selector.get_parameters('W')
    mlp_brick_name = 'batchnormalizedmlp'
    W0 = Ws['/%s/linear_0.W' % mlp_brick_name]
    W1 = Ws['/%s/linear_1.W' % mlp_brick_name]

    cost = cost_noreg + .01 * (W0 ** 2).mean() + .01 * (W1 ** 2).mean()


    return cost
예제 #6
0
def build_model(images, labels):

    # Construct a bottom convolutional sequence
    bottom_conv_sequence = convolutional_sequence((3, 3), 64, (150, 150))
    bottom_conv_sequence._push_allocation_config()

    # Flatten layer
    flattener = Flattener()

    # Construct a top MLP
    conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output'))
    top_mlp = MLP([
        LeakyRectifier(name='non_linear_9'),
        LeakyRectifier(name='non_linear_10'),
        Softmax(name='non_linear_11')
    ], [conv_out_dim, 2048, 612, 10],
                  weights_init=IsotropicGaussian(),
                  biases_init=Constant(1))

    # Construct feedforward sequence
    ss_seq = FeedforwardSequence(
        [bottom_conv_sequence.apply, flattener.apply, top_mlp.apply])
    ss_seq.push_initialization_config()
    ss_seq.initialize()

    prediction = ss_seq.apply(images)
    cost = CategoricalCrossEntropy().apply(labels.flatten(), prediction)

    return cost
예제 #7
0
    def __init__(self,
                 conv_activations,
                 num_channels,
                 image_shape,
                 filter_sizes,
                 feature_maps,
                 pooling_sizes,
                 top_mlp_activations,
                 top_mlp_dims,
                 conv_step=None,
                 border_mode='valid',
                 **kwargs):
        if conv_step is None:
            self.conv_step = (1, 1)
        else:
            self.conv_step = conv_step
        self.num_channels = num_channels
        self.image_shape = image_shape
        self.top_mlp_activations = top_mlp_activations
        self.top_mlp_dims = top_mlp_dims
        self.border_mode = border_mode

        conv_parameters = zip(filter_sizes, feature_maps)

        # Construct convolutional, activation, and pooling layers with corresponding parameters
        self.convolution_layer = (
            Convolutional(filter_size=filter_size,
                          num_filters=num_filter,
                          step=self.conv_step,
                          border_mode=self.border_mode,
                          name='conv_{}'.format(i))
            for i, (filter_size, num_filter) in enumerate(conv_parameters))

        self.BN_layer = (BatchNormalization(name='bn_conv_{}'.format(i))
                         for i in enumerate(conv_parameters))

        self.pooling_layer = (MaxPooling(size, name='pool_{}'.format(i))
                              for i, size in enumerate(pooling_sizes))

        self.layers = list(
            interleave([
                self.convolution_layer, self.BN_layer, conv_activations,
                self.pooling_layer
            ]))

        self.conv_sequence = ConvolutionalSequence(self.layers,
                                                   num_channels,
                                                   image_size=image_shape)

        # Construct a top MLP
        self.top_mlp = MLP(top_mlp_activations, top_mlp_dims)

        # We need to flatten the output of the last convolutional layer.
        # This brick accepts a tensor of dimension (batch_size, ...) and
        # returns a matrix (batch_size, features)
        self.flattener = Flattener()
        application_methods = [
            self.conv_sequence.apply, self.flattener.apply, self.top_mlp.apply
        ]
        super(LeNet, self).__init__(application_methods, **kwargs)
예제 #8
0
    def __init__(self, image_dimension, **kwargs):

        layers = []

        #############################################
        # a first block with 2 convolutions of 32 (3, 3) filters
        layers.append(Convolutional((3, 3), 32, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 32, border_mode='half'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        #############################################
        # a 2nd block with 3 convolutions of 64 (3, 3) filters
        layers.append(Convolutional((3, 3), 64, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 64, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 64, border_mode='half'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        #############################################
        # a 3rd block with 4 convolutions of 128 (3, 3) filters
        layers.append(Convolutional((3, 3), 128, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 128, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 128, border_mode='half'))
        layers.append(Rectifier())
        layers.append(Convolutional((3, 3), 128, border_mode='half'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        self.conv_sequence = ConvolutionalSequence(layers,
                                                   3,
                                                   image_size=image_dimension)

        flattener = Flattener()

        self.top_mlp = MLP(activations=[Rectifier(), Logistic()],
                           dims=[500, 1])

        application_methods = [
            self.conv_sequence.apply, flattener.apply, self.top_mlp.apply
        ]

        super(VGGNet, self).__init__(application_methods,
                                     biases_init=Constant(0),
                                     weights_init=Uniform(width=.1),
                                     **kwargs)
예제 #9
0
    def __init__(self,
                    conv_seq, fully_connected,
                    **kwargs):
        super(Classifier, self).__init__(**kwargs)

        self.conv_seq = conv_seq
        self.flatten = Flattener()
        self.fully_connected = fully_connected
        
        self.children = [self.conv_seq, self.flatten, self.fully_connected]
예제 #10
0
파일: train.py 프로젝트: refnil/ift6266h16
def net_dvc(image_size=(32, 32)):
    convos = [5, 5, 5]
    pools = [2, 2, 2]
    filters = [100, 200, 300]

    tuplify = lambda x: (x, x)
    convos = list(map(tuplify, convos))
    conv_layers = [Convolutional(filter_size=s,num_filters=o, num_channels=i, name="Conv"+str(n))\
            for s,o,i,n in zip(convos, filters, [3] + filters, range(1000))]

    pool_layers = [MaxPooling(p) for p in map(tuplify, pools)]

    activations = [Rectifier() for i in convos]

    layers = [i for l in zip(conv_layers, activations, pool_layers) for i in l]

    cnn = ConvolutionalSequence(layers,
                                3,
                                image_size=image_size,
                                name="cnn",
                                weights_init=Uniform(width=.1),
                                biases_init=Constant(0))

    cnn._push_allocation_config()
    cnn_output = np.prod(cnn.get_dim('output'))

    mlp_size = [cnn_output, 500, 2]
    mlp = MLP([Rectifier(), Softmax()],
              mlp_size,
              name="mlp",
              weights_init=Uniform(width=.1),
              biases_init=Constant(0))

    seq = FeedforwardSequence([net.apply for net in [cnn, Flattener(), mlp]])
    seq.push_initialization_config()

    seq.initialize()
    return seq
예제 #11
0
    def __init__(self, image_dimension, **kwargs):

        layers = []

        #############################################
        # a first block with 2 convolutions of 64 (3, 3) filters
        layers.append(
            Convolutional((3, 3), 64, border_mode='half', name='conv_1_1'))
        layers.append(Rectifier())
        layers.append(
            Convolutional((3, 3), 64, border_mode='half', name='conv_1_2'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        #############################################
        # a 2nd block with 3 convolutions of 128 (3, 3) filters
        layers.append(
            Convolutional((3, 3), 128, border_mode='half', name='conv_2_1'))
        layers.append(Rectifier())
        layers.append(
            Convolutional((3, 3), 128, border_mode='half', name='conv_2_2'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        #############################################
        # a 3rd block with 4 convolutions of 256 (3, 3) filters
        layers.append(
            Convolutional((3, 3), 256, border_mode='half', name='conv_3_1'))
        layers.append(Rectifier())
        layers.append(
            Convolutional((3, 3), 256, border_mode='half', name='conv_3_2'))
        layers.append(Rectifier())
        layers.append(
            Convolutional((3, 3), 256, border_mode='half', name='conv_3_3'))
        layers.append(Rectifier())
        layers.append(
            Convolutional((3, 3), 256, border_mode='half', name='conv_3_4'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        #############################################
        # a 4th block with 4 convolutions of 512 (3, 3) filters
        layers.append(
            Convolutional((3, 3), 512, border_mode='half', name='conv_4_1'))
        layers.append(Rectifier())
        layers.append(
            Convolutional((3, 3), 512, border_mode='half', name='conv_4_2'))
        layers.append(Rectifier())
        layers.append(
            Convolutional((3, 3), 512, border_mode='half', name='conv_4_3'))
        layers.append(Rectifier())
        layers.append(
            Convolutional((3, 3), 512, border_mode='half', name='conv_4_4'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        #############################################
        # a 5th block with 4 convolutions of 512 (3, 3) filters
        layers.append(
            Convolutional((3, 3), 512, border_mode='half', name='conv_5_1'))
        layers.append(Rectifier())
        layers.append(
            Convolutional((3, 3), 512, border_mode='half', name='conv_5_2'))
        layers.append(Rectifier())
        layers.append(
            Convolutional((3, 3), 512, border_mode='half', name='conv_5_3'))
        layers.append(Rectifier())
        layers.append(
            Convolutional((3, 3), 512, border_mode='half', name='conv_5_4'))
        layers.append(Rectifier())

        # maxpool with size=(2, 2)
        layers.append(MaxPooling((2, 2)))

        self.conv_sequence = ConvolutionalSequence(layers,
                                                   3,
                                                   image_size=image_dimension)

        flattener = Flattener()

        self.top_mlp = BatchNormalizedMLP(
            activations=[Rectifier(),
                         Rectifier(),
                         Rectifier(),
                         Logistic()],
            dims=[4096, 4096, 1000, 1])

        application_methods = [
            self.conv_sequence.apply, flattener.apply, self.top_mlp.apply
        ]

        super(VGGNet, self).__init__(application_methods,
                                     biases_init=Constant(0),
                                     weights_init=Uniform(width=.1),
                                     **kwargs)
예제 #12
0
    def convert_model(self, L):
        """Converts a Matlab model into Blocks. The method expects
        an argument L specifying the layers of the Matlab model,
        e.g. as returned by load_model. It returns a list of bricks.
        This list may be longer than the list of layers in L, because
        additional padding bricks are introduced to work around
        limitiations in the Blocks pooling bricks."""

        layers = []
        image_size = self.imagesize

        for i in range(37):  # 37 layers in Matlab model
            l = L[0][i][0][0]
            tp = l["type"][0]
            name = l["name"][0]

            if tp == "conv":
                wt = l["weights"][0, 0]
                bias = l["weights"][0, 1]
                pad = l["pad"][0]
                stride = l["stride"][0]

                # WORK-AROUND to get to 7x7 output after last convolution
                if name == 'conv5_3':
                    pad = [0, 1, 0, 1]

                if sum(pad) > 0:
                    pad = [int(d) for d in pad]
                    layer = Padding(pad)
                    layer.image_size = image_size
                    image_size = layer.get_dim("output")[1:3]
                    layers.append(layer)

                layer, outdim = self.conv_layer(name, wt, bias, image_size)
                layers.append(layer)
                image_size = outdim

            elif tp == "pool":
                method = l["method"][0]
                pool = l["pool"][0]
                stride = l["stride"][0]
                pad = l["pad"][0]

                stride = [int(d) for d in stride]
                pool = [int(d) for d in pool]
                pad = [int(d) for d in pad]

                layer, outdim = self.pool_layer(name, method, pool, pad,
                                                stride, image_size)
                layers.append(layer)
                image_size = outdim

            elif tp == "relu":
                layers.append(self.relu_layer(name))

            elif tp == "softmax":
                layers.append(Flattener('flatten'))
                layers.append(self.softmax_layer(name))

        print(len(layers), 'layers created')
        return layers
def run_experiment():

    np.random.seed(42)

    X = tensor.tensor4('features')
    nbr_channels = 3
    image_shape = (5, 5)

    conv_layers = [ ConvolutionalLayer( filter_size=(2,2),
                                        num_filters=10,
                                        activation=Rectifier().apply,
                                        border_mode='valid',
                                        pooling_size=(1,1),
                                        weights_init=Uniform(width=0.1),
                                        #biases_init=Uniform(width=0.01),
                                        biases_init=Constant(0.0),
                                        name='conv0')]
    conv_sequence = ConvolutionalSequence(  conv_layers,
                                            num_channels=nbr_channels,
                                            image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()
    
    flattener = Flattener()
    conv_output = conv_sequence.apply(X)
    y_hat = flattener.apply(conv_output)
    # Whatever. Not important since we're not going to actually train anything.
    cost = tensor.sqr(y_hat).sum()


    #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)]
    L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[BIAS])(ComputationGraph([y_hat]).variables)]
    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_02 = sum([tensor.sqr(g).sum() for g in L_grads_method_02])


    D_by_layer = get_conv_layers_transformation_roles(ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations(D_by_layer, cost)


    # why does this thing depend on N again ?
    # I don't think I've used a cost that divides by N.

    N = 2
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32)
    #Xtrain[1:,:,:,:] = 0.0
    Xtrain[:,:,:,:] = 1.0

    convolution_filter_variable = VariableFilter(roles=[FILTER])(ComputationGraph([y_hat]).variables)[0]
    convolution_filter_variable_value = convolution_filter_variable.get_value()
    convolution_filter_variable_value[:,:,:,:] = 1.0
    #convolution_filter_variable_value[0,0,:,:] = 1.0
    convolution_filter_variable.set_value(convolution_filter_variable_value)

    f = theano.function([X],
                        [cost,
                            individual_sum_square_norm_gradients_method_00,
                            sum_square_norm_gradients_method_02])


    [c, v0, gs2] = f(Xtrain)

    #print "[c, v0, gs2]"
    L_c, L_v0, L_gs2 = ([], [], [])
    for n in range(N):
        [nc, nv0, ngs2] = f(Xtrain[n,:, :, :].reshape((1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1,-1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1,-1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs2).reshape((1,-1)) / v0.reshape((1,-1))
class CompositeSequence(Sequence, Initializable, Feedforward):
    @lazy(allocation=[])
    def __init__(self, conv_seq=None, mlp=None, **kwargs):
        self.conv_seq = conv_seq
        self.mlp = mlp
        self.flatten = Flattener()
        self.T = 1.

        application_methods = [
            self.conv_seq.apply, self.flatten.apply, self.mlp.apply
        ]
        super(CompositeSequence,
              self).__init__(application_methods=application_methods, **kwargs)

    def set_T(self, newT):
        #assert newT >=1.
        self.T = newT

    def confidence(self, input_):
        y_prev = self.apply(input_)
        labels = T.cast(T.argmax(Softmax().apply(y_prev), axis=1), 'uint8')
        return Softmax().categorical_cross_entropy(labels.flatten(), y_prev)

    def error(self, x, y):
        y_pred = Softmax().apply(self.apply(x))
        return MisclassificationRate().apply(y.flatten(), y_pred).mean()

    def predict(self, x):
        return T.argmax(self.probabilities(x), axis=1)

    def probabilities(self, x):
        return Softmax().apply(self.apply(x))

    def _push_allocation_config(self):
        self.conv_seq._push_allocation_config()
        self.mlp._push_allocation_config()
        self.flatten._push_allocation_config()

    def get_dim(self, name):
        if name == 'input_':
            return self.conv_seq.get_dim('input_')
        if name == 'output':
            return self.mlp.output_dim
        #return self.mlp.get_dim('output')
        return super(CompositeSequence, self).get_dim(name)

    def get_Params(self):

        # warning : sometimes with the gpu mode the list
        # of parameters can be from the output to the input layer
        # we need in this case to inverse the list

        # to check that we look at the list of parameters
        # of the whole system
        # if the first weight is a matrix then inverse
        #return a dictionary of params
        x = T.tensor4()
        cost = self.apply(x).sum()
        cg = ComputationGraph(cost)
        W = VariableFilter(roles=[WEIGHT])(cg.variables)
        B = VariableFilter(roles=[BIAS])(cg.variables)
        if W[0].name[:5] == "layer":
            return W, B

# find other parameters and retrieve them for the lists
        gamma = []
        beta = []
        index_gamma = []
        index_beta = []
        for w, b, i in zip(W, B, range(len(W))):

            if w.name == "log_gamma":
                index_gamma.append(i)
                gamma.append(w)
            if b.name == "beta":
                index_beta.append(i)
                beta.append(b)

        for i in index_gamma[::-1]:
            W.pop(i)
        for i in index_beta[::-1]:
            B.pop(i)

        if len(W) == 0:
            import pdb
            pdb.set_trace()
        if W[0].ndim == 2:
            W_ = []
            for i in xrange(len(W)):
                W_.append(W[len(W) - 1 - i])
            W = W_
        if B[0].ndim == 1:
            B_ = []
            for i in xrange(len(B)):
                B_.append(B[len(B) - 1 - i])
            B = B_

# if batch normalization has been introduced you need to reinject artificially
# the gamma and beta parameters so to fit with the actual protocol of dropout
        if len(gamma) != len(beta):
            raise Exception(
                " gamma and beta parameters should be balanced : (%d, %d)",
                len(gamma), len(beta))

        if len(gamma) != 0:
            if beta[0].shape.eval() != gamma[0].shape.eval():
                beta.reverse()
            W_new = []
            B_new = []
            for w, g in zip(W[:len(gamma)], gamma):
                W_new.append(w)
                W_new.append(g)
            W_new += W[len(gamma):]
            for b, b_ in zip(B[:len(gamma)], beta):
                B_new.append(b)
                B_new.append(b_)
            B_new += B[len(gamma):]
            W = W_new
            B = B_new

        for w, b, index in zip(W, B, range(len(W))):
            w.name = "layer_" + str(index) + "_W"
            b.name = "layer_" + str(index) + "_B"

        return W, B
예제 #15
0
                  biases_init=IsotropicGaussian(std=0.01),
                  use_bias=True,
                  border_mode="valid",
                  step=(1, 1))
l.initialize()
o = l.apply(o)

l = BatchNormalizationConv(input_shape=l.get_dim("output"),
                           B_init=IsotropicGaussian(std=0.01),
                           Y_init=IsotropicGaussian(std=0.01))
l.initialize()
o = l.apply(o)

shape = np.prod(l.get_dim("output"))

o = Flattener().apply(o)

l = Linear(input_dim=shape,
           output_dim=200,
           weights_init=IsotropicGaussian(std=0.01),
           biases_init=IsotropicGaussian(std=0.01))
l.initialize()
o = l.apply(o)

l = BatchNormalization(input_dim=l.get_dim("output"),
                       B_init=IsotropicGaussian(std=0.01),
                       Y_init=IsotropicGaussian(std=0.01))
l.initialize()
o = l.apply(o)

o = Rectifier().apply(o)
예제 #16
0
def build_and_run(label, config):
    ############## CREATE THE NETWORK ###############
    #Define the parameters
    num_epochs, num_batches, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation = config[
        'num_epochs'], config['num_batches'], config['num_channels'], config[
            'image_shape'], config['filter_size'], config[
                'num_filter'], config['pooling_sizes'], config[
                    'mlp_hiddens'], config['output_size'], config[
                        'batch_size'], config['activation'], config[
                            'mlp_activation']
    #    print(num_epochs, num_channels, image_shape, filter_size, num_filter, pooling_sizes, mlp_hiddens, output_size, batch_size, activation, mlp_activation)
    lambda_l1 = 0.000025
    lambda_l2 = 0.000025

    print("Building model")
    #Create the symbolics variable
    x = T.tensor4('image_features')
    y = T.lmatrix('targets')

    #Get the parameters
    conv_parameters = zip(filter_size, num_filter)

    #Create the convolutions layers
    conv_layers = list(
        interleave([(Convolutional(filter_size=filter_size,
                                   num_filters=num_filter,
                                   name='conv_{}'.format(i))
                     for i, (filter_size,
                             num_filter) in enumerate(conv_parameters)),
                    (activation),
                    (MaxPooling(size, name='pool_{}'.format(i))
                     for i, size in enumerate(pooling_sizes))]))
    #    (AveragePooling(size, name='pool_{}'.format(i)) for i, size in enumerate(pooling_sizes))]))

    #Create the sequence
    conv_sequence = ConvolutionalSequence(conv_layers,
                                          num_channels,
                                          image_size=image_shape,
                                          weights_init=Uniform(width=0.2),
                                          biases_init=Constant(0.))
    #Initialize the convnet
    conv_sequence.initialize()
    #Add the MLP
    top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))
                    ] + mlp_hiddens + [output_size]
    out = Flattener().apply(conv_sequence.apply(x))
    mlp = MLP(mlp_activation,
              top_mlp_dims,
              weights_init=Uniform(0, 0.2),
              biases_init=Constant(0.))
    #Initialisze the MLP
    mlp.initialize()
    #Get the output
    predict = mlp.apply(out)

    cost = CategoricalCrossEntropy().apply(y.flatten(),
                                           predict).copy(name='cost')
    error = MisclassificationRate().apply(y.flatten(), predict)

    #Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
    error_rate = error.copy(name='error_rate')
    error_rate2 = error.copy(name='error_rate2')

    ########### REGULARIZATION ##################
    cg = ComputationGraph([cost])
    weights = VariableFilter(roles=[WEIGHT])(cg.variables)
    biases = VariableFilter(roles=[BIAS])(cg.variables)
    # # l2_penalty_weights = T.sum([i*lambda_l2/len(weights) * (W ** 2).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
    l2_penalty = T.sum([
        lambda_l2 * (W**2).sum() for i, W in enumerate(weights + biases)
    ])  # Gradually increase penalty for layer
    # # #l2_penalty_bias = T.sum([lambda_l2*(B **2).sum() for B in biases])
    # # #l2_penalty = l2_penalty_weights + l2_penalty_bias
    l2_penalty.name = 'l2_penalty'
    l1_penalty = T.sum([lambda_l1 * T.abs_(z).sum() for z in weights + biases])
    #  l1_penalty_weights = T.sum([i*lambda_l1/len(weights) * T.abs_(W).sum() for i,W in enumerate(weights)]) # Gradually increase penalty for layer
    #  l1_penalty_biases = T.sum([lambda_l1 * T.abs_(B).sum() for B in biases])
    #  l1_penalty = l1_penalty_biases + l1_penalty_weights
    l1_penalty.name = 'l1_penalty'
    costreg = cost + l2_penalty + l1_penalty
    costreg.name = 'costreg'

    ########### DEFINE THE ALGORITHM #############
    #  algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum())
    algorithm = GradientDescent(cost=costreg,
                                parameters=cg.parameters,
                                step_rule=Adam())

    ########### GET THE DATA #####################
    istest = 'test' in config.keys()
    train_stream, valid_stream, test_stream = get_stream(batch_size,
                                                         image_shape,
                                                         test=istest)

    ########### INITIALIZING EXTENSIONS ##########
    checkpoint = Checkpoint('models/best_' + label + '.tar')
    checkpoint.add_condition(
        ['after_epoch'], predicate=OnLogRecord('valid_error_rate_best_so_far'))
    #Adding a live plot with the bokeh server
    plot = Plot(
        label,
        channels=[
            ['train_error_rate', 'valid_error_rate'],
            ['valid_cost', 'valid_error_rate2'],
            # ['train_costreg','train_grad_norm']], #
            [
                'train_costreg', 'train_total_gradient_norm',
                'train_l2_penalty', 'train_l1_penalty'
            ]
        ],
        server_url="http://hades.calculquebec.ca:5042")

    grad_norm = aggregation.mean(algorithm.total_gradient_norm)
    grad_norm.name = 'grad_norm'

    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches),
        DataStreamMonitoring([cost, error_rate, error_rate2],
                             valid_stream,
                             prefix="valid"),
        TrainingDataMonitoring([
            costreg, error_rate, error_rate2, grad_norm, l2_penalty, l1_penalty
        ],
                               prefix="train",
                               after_epoch=True),
        plot,
        ProgressBar(),
        Printing(),
        TrackTheBest('valid_error_rate', min),  #Keep best
        checkpoint,  #Save best
        FinishIfNoImprovementAfter('valid_error_rate_best_so_far', epochs=4)
    ]  # Early-stopping
    model = Model(cost)
    main_loop = MainLoop(algorithm,
                         data_stream=train_stream,
                         model=model,
                         extensions=extensions)
    main_loop.run()
예제 #17
0
    b.ConvolutionalLayer(activation,
                         filter_size,
                         num_filters_,
                         pooling_size,
                         num_channels=3) for filter_size, num_filters_,
    pooling_size in zip(filter_sizes, num_filters, pooling_sizes)
]

convnet = ConvolutionalSequence(conv_layers,
                                num_channels=3,
                                image_size=(32, 32),
                                weights_init=Uniform(0, 0.2),
                                biases_init=Constant(0.))

convnet.initialize()
conv_features = Flattener().apply(convnet.apply(X))

# MLP

mlp = MLP(activations=[Logistic(name='sigmoid_0'),
                       Softmax(name='softmax_1')],
          dims=[256, 256, 256, 2],
          weights_init=IsotropicGaussian(0.01),
          biases_init=Constant(0))
[child.name for child in mlp.children]
['linear_0', 'sigmoid_0', 'linear_1', 'softmax_1']
Y = mlp.apply(conv_features)
mlp.initialize()

# Setting up the cost function
from blocks.bricks.cost import CategoricalCrossEntropy
def run_experiment():

    np.random.seed(42)

    X = tensor.tensor4('features')
    nbr_channels = 3
    image_shape = (5, 5)

    conv_layers = [
        ConvolutionalLayer(
            filter_size=(2, 2),
            num_filters=10,
            activation=Rectifier().apply,
            border_mode='valid',
            pooling_size=(1, 1),
            weights_init=Uniform(width=0.1),
            #biases_init=Uniform(width=0.01),
            biases_init=Constant(0.0),
            name='conv0')
    ]
    conv_sequence = ConvolutionalSequence(conv_layers,
                                          num_channels=nbr_channels,
                                          image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()

    flattener = Flattener()
    conv_output = conv_sequence.apply(X)
    y_hat = flattener.apply(conv_output)
    # Whatever. Not important since we're not going to actually train anything.
    cost = tensor.sqr(y_hat).sum()

    #L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[FILTER, BIAS])(ComputationGraph([y_hat]).variables)]
    L_grads_method_02 = [
        tensor.grad(cost, v) for v in VariableFilter(
            roles=[BIAS])(ComputationGraph([y_hat]).variables)
    ]
    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_02 = sum(
        [tensor.sqr(g).sum() for g in L_grads_method_02])

    D_by_layer = get_conv_layers_transformation_roles(
        ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_conv_transformations(
        D_by_layer, cost)

    # why does this thing depend on N again ?
    # I don't think I've used a cost that divides by N.

    N = 2
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0],
                             image_shape[1]).astype(np.float32)
    #Xtrain[1:,:,:,:] = 0.0
    Xtrain[:, :, :, :] = 1.0

    convolution_filter_variable = VariableFilter(roles=[FILTER])(
        ComputationGraph([y_hat]).variables)[0]
    convolution_filter_variable_value = convolution_filter_variable.get_value()
    convolution_filter_variable_value[:, :, :, :] = 1.0
    #convolution_filter_variable_value[0,0,:,:] = 1.0
    convolution_filter_variable.set_value(convolution_filter_variable_value)

    f = theano.function([X], [
        cost, individual_sum_square_norm_gradients_method_00,
        sum_square_norm_gradients_method_02
    ])

    [c, v0, gs2] = f(Xtrain)

    #print "[c, v0, gs2]"
    L_c, L_v0, L_gs2 = ([], [], [])
    for n in range(N):
        [nc, nv0, ngs2] = f(Xtrain[n, :, :, :].reshape(
            (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1, -1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1, -1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs2).reshape((1, -1)) / v0.reshape((1, -1))
                  num_filters=num_filter[j + 1],
                  step=conv_step,
                  border_mode=border_mode,
                  name='conv_{}'.format(i)))
conv_layers4.append(BatchNormalization(name='BNconv_{}'.format(i)))
conv_layers4.append(conv_activation[0])
conv_layers4.append(MaxPooling(pooling_size[j + 1], name='pool_{}'.format(i)))

conv_sequence4 = ConvolutionalSequence(conv_layers1,
                                       num_channels=num_channels,
                                       image_size=image_size,
                                       weights_init=Uniform(width=0.2),
                                       biases_init=Constant(0.),
                                       name='ConvSeq_{}'.format(i))
conv_sequence4.initialize()
out = Flattener().apply(conv_sequence4.apply(out))

################# Final MLP layers #################
# MLP parameters
mlp_hiddens = 1000

top_mlp_dims = [numpy.prod(conv_sequence4.get_dim('output'))
                ] + [mlp_hiddens] + [output_size]
top_mlp = MLP(mlp_activation,
              top_mlp_dims,
              weights_init=Uniform(width=0.2),
              biases_init=Constant(0.))
top_mlp.initialize()

probs = top_mlp.apply(out)
cost = CategoricalCrossEntropy(name='Cross1').apply(y.flatten(),
예제 #20
0
    ConvolutionalLayer(Rectifier().apply, (3, 3), 32, (2, 2), name='l2')
]

convnet = ConvolutionalSequence(conv_layers,
                                num_channels=1,
                                image_size=(28, 28),
                                weights_init=IsotropicGaussian(0.1),
                                biases_init=Constant(0))

convnet.initialize()

output_dim = np.prod(convnet.get_dim('output'))
print(output_dim)

# Fully connected layers
features = Flattener().apply(convnet.apply(x))

mlp = MLP(activations=[Rectifier(), None],
          dims=[output_dim, 100, 10],
          weights_init=IsotropicGaussian(0.01),
          biases_init=Constant(0))
mlp.initialize()

y_hat = mlp.apply(features)

# numerically stable softmax
cost = Softmax().categorical_cross_entropy(y.flatten(), y_hat)
cost.name = 'nll'
error_rate = MisclassificationRate().apply(y.flatten(), y_hat)
#cost = MisclassificationRate().apply(y, y_hat)
#cost.name = 'error_rate'
예제 #21
0
    Rectifier(),
    MaxPooling((2, 2), name='MaxPol1'),
    Convolutional(filter_size=(1, 1), num_filters=1024, name='Convx3'),
    Rectifier(),
    MaxPooling((2, 2), name='MaxPol2'),
    Convolutional(filter_size=(1, 1), num_filters=2, name='Convx4'),
    Rectifier(),
])
conv_sequence1 = ConvolutionalSequence(conv_layers1,
                                       num_channels=512,
                                       image_size=(10, 10),
                                       weights_init=Orthogonal(),
                                       use_bias=False,
                                       name='ConvSeq3')
conv_sequence1.initialize()
out_soft1 = Flattener(name='Flatt1').apply(conv_sequence1.apply(out5))
predict1 = NDimensionalSoftmax(name='Soft1').apply(out_soft1)
cost1 = CategoricalCrossEntropy(name='Cross1').apply(
    y.flatten(), predict1).copy(name='cost1')

#SECOND SOFTMAX
conv_layers2 = list([
    MaxPooling((2, 2), name='MaxPol2'),
    Convolutional(filter_size=(1, 1), num_filters=128, name='Convx21'),
    Rectifier(),
    MaxPooling((2, 2), name='MaxPol11'),
    Convolutional(filter_size=(1, 1), num_filters=1024, name='Convx31'),
    Rectifier(),
    MaxPooling((2, 2), name='MaxPol21'),
    Convolutional(filter_size=(1, 1), num_filters=2, name='Convx41'),
    Rectifier(),
예제 #22
0
def build_model(images, labels):

    vgg = VGG(layer='conv3_4')
    vgg.push_initialization_config()
    vgg.initialize()

    sb = SubstractBatch()

    # Construct a bottom convolutional sequence
    layers = [
        Convolutional(filter_size=(3, 3),
                      num_filters=100,
                      use_bias=True,
                      tied_biases=True,
                      name='final_conv0'),
        BatchNormalization(name='batchnorm_1'),
        Rectifier(name='final_conv0_act'),
        Convolutional(filter_size=(3, 3),
                      num_filters=100,
                      use_bias=True,
                      tied_biases=True,
                      name='final_conv1'),
        BatchNormalization(name='batchnorm_2'),
        Rectifier(name='final_conv1_act'),
        MaxPooling(pooling_size=(2, 2), name='maxpool_final')
    ]
    bottom_conv_sequence = ConvolutionalSequence(
        layers,
        num_channels=256,
        image_size=(40, 40),
        biases_init=Constant(0.),
        weights_init=IsotropicGaussian(0.01))
    bottom_conv_sequence._push_allocation_config()

    # Flatten layer
    flattener = Flattener()

    # Construct a top MLP
    conv_out_dim = numpy.prod(bottom_conv_sequence.get_dim('output'))
    print 'dim output conv:', bottom_conv_sequence.get_dim('output')
    # conv_out_dim = 20 * 40 * 40
    top_mlp = BatchNormalizedMLP(
        [Rectifier(name='non_linear_9'),
         Softmax(name='non_linear_11')], [conv_out_dim, 1024, 10],
        weights_init=IsotropicGaussian(),
        biases_init=Constant(0))

    # Construct feedforward sequence
    ss_seq = FeedforwardSequence([
        vgg.apply, bottom_conv_sequence.apply, flattener.apply, top_mlp.apply
    ])
    ss_seq.push_initialization_config()
    ss_seq.initialize()

    prediction = ss_seq.apply(images)
    cost_noreg = CategoricalCrossEntropy().apply(labels.flatten(), prediction)

    # add regularization
    selector = Selector([top_mlp])
    Ws = selector.get_parameters('W')
    mlp_brick_name = 'batchnormalizedmlp'
    W0 = Ws['/%s/linear_0.W' % mlp_brick_name]
    W1 = Ws['/%s/linear_1.W' % mlp_brick_name]
    cost = cost_noreg + .0001 * (W0**2).sum() + .001 * (W1**2).sum()

    # define learned parameters
    selector = Selector([ss_seq])
    Ws = selector.get_parameters('W')
    bs = selector.get_parameters('b')
    BNSCs = selector.get_parameters('batch_norm_scale')
    BNSHs = selector.get_parameters('batch_norm_shift')

    parameters_top = []
    parameters_top += [v for k, v in Ws.items()]
    parameters_top += [v for k, v in bs.items()]
    parameters_top += [v for k, v in BNSCs.items()]
    parameters_top += [v for k, v in BNSHs.items()]

    selector = Selector([vgg])
    convs = selector.get_parameters()

    parameters_all = []
    parameters_all += parameters_top
    parameters_all += [v for k, v in convs.items()]

    return cost, [parameters_top, parameters_all]
예제 #23
0
out2 = Fire((55,55), 128, 16, 16, 16, out1, 25)
out3 = Fire((55,55), 128, 32, 32, 32, out2, 300)
out31 = MaxPooling((3,3), step=(2,2), padding=(1,1), name='poolLow').apply(out3)
out4 = Fire((28,28), 256, 32, 32, 32, out31, 45)
out5 = Fire((28,28), 256, 48, 48, 48, out4, 500)
out6 = Fire((28,28), 384, 48, 48, 48, out5, 65)
out7 = Fire((28,28), 384, 64, 64, 64, out6, 700)
out71 = MaxPooling((3,3), step=(2,2), padding=(1,1), name='poolLow2').apply(out7)
out8 = Fire((14,14), 512, 64, 64, 64, out71, 85)

#LAST LAYERS
conv_layers1 = list([Convolutional(filter_size=(1,1), num_filters=2, name='Convx2'), BatchNormalization(name='batch_vx2'), Rectifier(),
    AveragePooling((14,14), name='MaxPol1')])
conv_sequence1 = ConvolutionalSequence(conv_layers1, num_channels=512, image_size=(14,14), weights_init=Orthogonal(), use_bias=False, name='ConvSeq3')
conv_sequence1.initialize()
out_soft1 = Flattener(name='Flatt1').apply(conv_sequence1.apply(out8))
predict1 = NDimensionalSoftmax(name='Soft1').apply(out_soft1)
cost = CategoricalCrossEntropy(name='Cross1').apply(y.flatten(), predict1).copy(name='cost')
error = MisclassificationRate().apply(y.flatten(), predict1)

#Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
error_rate = error.copy(name='error_rate')
error_rate2 = error.copy(name='error_rate2')
cg = ComputationGraph([cost, error_rate])

########### GET THE DATA #####################
stream_train = ServerDataStream(('image_features','targets'), False, port=5512, hwm=40)
stream_valid = ServerDataStream(('image_features','targets'), False, port=5513, hwm=40)

########### DEFINE THE ALGORITHM #############
algorithm = GradientDescent(cost=cost, parameters=cg.parameters, step_rule=Momentum(learning_rate=0.01, momentum=0.9))
예제 #24
0
# Theano variables
x = tensor.tensor4('image_features')
y = tensor.lmatrix('targets')


### setting up "clean" and "dirty" images with data_preprocessing
if mode == ("GPU_run"):
    try:
        x1 = data_preprocessing1(x).copy(name='x_clean'))
        x2 = data_preprocessing2(x).copy(name='x_dirty'))
        out1 = conv_sequence2.apply(x1)
        out2 = conv_sequence2.apply(x2)


### Flattening data
conv_out1 = Flattener(name='flattener1').apply(out1)
conv_out2 = Flattener(name='flattener2').apply(out2)
conv_out = tensor.concatenate([conv_out1,conv_out2],axis=1)

### MLP
mlp_hiddens = 1000
top_mlp_dims = [numpy.prod(conv_sequence1.get_dim('output'))+numpy.prod(conv_sequence1.get_dim('output'))] + [mlp_hiddens] + [output_size]
top_mlp = MLP(mlp_activation, top_mlp_dims,weights_init=Uniform(width=0.2),biases_init=Constant(0.))
top_mlp.initialize()



### Getting the data

from fuel.datasets.dogs_vs_cats import DogsVsCats
from fuel.streams import DataStream, ServerDataStream
def run_experiment():

    np.random.seed(42)

    #X = tensor.matrix('features')
    X = tensor.tensor4('features')
    y = tensor.matrix('targets')
    nbr_channels = 3
    image_shape = (30, 30)

    conv_layers = [ ConvolutionalLayer( filter_size=(4,4),
                                        num_filters=10,
                                        activation=Rectifier().apply,
                                        border_mode='full',
                                        pooling_size=(1,1),
                                        weights_init=Uniform(width=0.1),
                                        biases_init=Constant(0.0),
                                        name='conv0'),
                    ConvolutionalLayer( filter_size=(3,3),
                                        num_filters=14,
                                        activation=Rectifier().apply,
                                        border_mode='full',
                                        pooling_size=(1,1),
                                        weights_init=Uniform(width=0.1),
                                        biases_init=Constant(0.0),
                                        name='conv1')]
    conv_sequence = ConvolutionalSequence(  conv_layers,
                                            num_channels=nbr_channels,
                                            image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()
    conv_output_dim = np.prod(conv_sequence.get_dim('output'))
    #conv_output_dim = 25*25

    flattener = Flattener()

    mlp = MLP(  activations=[Rectifier(), Rectifier(), Softmax()],
                dims=[conv_output_dim, 50, 50, 10],
                weights_init=IsotropicGaussian(std=0.1), biases_init=IsotropicGaussian(std=0.01))
    mlp.initialize()

    conv_output = conv_sequence.apply(X)
    y_hat = mlp.apply(flattener.apply(conv_output))

    cost = CategoricalCrossEntropy().apply(y, y_hat)
    #cost = CategoricalCrossEntropy().apply(y_hat, y)
    #cost = BinaryCrossEntropy().apply(y.flatten(), y_hat.flatten())

    cg = ComputationGraph([y_hat])
    
    """
    print "--- INPUT ---"
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[INPUT])(cg.variables):
        print v.tag.annotations[0].name

    print "--- OUTPUT ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables):
        print v.tag.annotations[0].name

    print "--- WEIGHT ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables):
        print v.tag.annotations[0].name
    print "--- BIAS ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables):
        print v.tag.annotations[0].name
    """

    # check out .tag on the variables to see which layer they belong to

    print "----------------------------"


    D_by_layer = get_linear_transformation_roles(mlp, cg)

    # returns a vector with one entry for each in the mini-batch
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_linear_transformations(D_by_layer, cost)

    #import pprint
    #pp = pprint.PrettyPrinter(indent=4)
    #pp.pprint(get_conv_layers_transformation_roles(ComputationGraph(conv_output)).items())

    D_by_layer = get_conv_layers_transformation_roles(ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 += get_sum_square_norm_gradients_conv_transformations(D_by_layer, cost)



    print "There are %d entries in cg.parameters." % len(cg.parameters)
    L_grads_method_01 = [tensor.grad(cost, p) for p in cg.parameters]
    L_grads_method_02 = [tensor.grad(cost, v) for v in VariableFilter(roles=[WEIGHT, BIAS])(cg.variables)]

    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_01 = sum([tensor.sqr(g).sum() for g in L_grads_method_01])
    sum_square_norm_gradients_method_02 = sum([tensor.sqr(g).sum() for g in L_grads_method_02])

    N = 8
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0], image_shape[1]).astype(np.float32)

    # Option 1.
    ytrain = np.zeros((N, 10), dtype=np.float32)
    for n in range(N):
        label = np.random.randint(low=0, high=10)
        ytrain[n, label] = 1.0

    # Option 2, just to debug situations with NaN.
    #ytrain = np.random.rand(N, 10).astype(np.float32)
    #for n in range(N):
    #    ytrain[n,:] = ytrain[n,:] / ytrain[n,:].sum()


    f = theano.function([X,y],
                        [cost,
                            individual_sum_square_norm_gradients_method_00,
                            sum_square_norm_gradients_method_01,
                            sum_square_norm_gradients_method_02])

    [c, v0, gs1, gs2] = f(Xtrain, ytrain)

    #print "[c, v0, gs1, gs2]"
    L_c, L_v0, L_gs1, L_gs2 = ([], [], [], [])
    for n in range(N):
        [nc, nv0, ngs1, ngs2] = f(Xtrain[n,:].reshape((1,Xtrain.shape[1],Xtrain.shape[2], Xtrain.shape[3])), ytrain[n,:].reshape((1,10)))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs1.append(ngs1)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1,-1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs1).reshape((1,-1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1,-1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs1)))
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs1).reshape((1,-1)) / v0.reshape((1,-1))
예제 #26
0
                         num_filter) in enumerate(conv_parameters)),
                (MaxPooling(size, name='pool_{}'.format(i))
                 for i, size in enumerate(pooling_sizes))]))

#Create the sequence
conv_sequence = ConvolutionalSequence(conv_layers,
                                      num_channels,
                                      image_size=image_shape,
                                      weights_init=Uniform(width=0.2),
                                      biases_init=Constant(0.))
#Initialize the convnet
conv_sequence.initialize()
#Add the MLP
top_mlp_dims = [np.prod(conv_sequence.get_dim('output'))
                ] + mlp_hiddens + [output_size]
out = Flattener().apply(conv_sequence.apply(x))
mlp = MLP(mlp_activation,
          top_mlp_dims,
          weights_init=Uniform(0, 0.2),
          biases_init=Constant(0.))
#Initialisze the MLP
mlp.initialize()
#Get the output
predict = mlp.apply(out)

cost = CategoricalCrossEntropy().apply(y.flatten(), predict).copy(name='cost')
error = MisclassificationRate().apply(y.flatten(), predict)
#Little trick to plot the error rate in two different plots (We can't use two time the same data in the plot for a unknow reason)
error_rate = error.copy(name='error_rate')
error_rate2 = error.copy(name='error_rate2')
cg = ComputationGraph([cost, error_rate])
예제 #27
0
def run_experiment():

    np.random.seed(42)

    #X = tensor.matrix('features')
    X = tensor.tensor4('features')
    y = tensor.matrix('targets')
    nbr_channels = 3
    image_shape = (30, 30)

    conv_layers = [
        ConvolutionalLayer(filter_size=(4, 4),
                           num_filters=10,
                           activation=Rectifier().apply,
                           border_mode='full',
                           pooling_size=(1, 1),
                           weights_init=Uniform(width=0.1),
                           biases_init=Constant(0.0),
                           name='conv0'),
        ConvolutionalLayer(filter_size=(3, 3),
                           num_filters=14,
                           activation=Rectifier().apply,
                           border_mode='full',
                           pooling_size=(1, 1),
                           weights_init=Uniform(width=0.1),
                           biases_init=Constant(0.0),
                           name='conv1')
    ]
    conv_sequence = ConvolutionalSequence(conv_layers,
                                          num_channels=nbr_channels,
                                          image_size=image_shape)
    #conv_sequence.push_allocation_config()
    conv_sequence.initialize()
    conv_output_dim = np.prod(conv_sequence.get_dim('output'))
    #conv_output_dim = 25*25

    flattener = Flattener()

    mlp = MLP(activations=[Rectifier(), Rectifier(),
                           Softmax()],
              dims=[conv_output_dim, 50, 50, 10],
              weights_init=IsotropicGaussian(std=0.1),
              biases_init=IsotropicGaussian(std=0.01))
    mlp.initialize()

    conv_output = conv_sequence.apply(X)
    y_hat = mlp.apply(flattener.apply(conv_output))

    cost = CategoricalCrossEntropy().apply(y, y_hat)
    #cost = CategoricalCrossEntropy().apply(y_hat, y)
    #cost = BinaryCrossEntropy().apply(y.flatten(), y_hat.flatten())

    cg = ComputationGraph([y_hat])
    """
    print "--- INPUT ---"
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[INPUT])(cg.variables):
        print v.tag.annotations[0].name

    print "--- OUTPUT ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[OUTPUT])(cg.variables):
        print v.tag.annotations[0].name

    print "--- WEIGHT ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg.variables):
        print v.tag.annotations[0].name
    print "--- BIAS ---"
    #print(VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables))
    for v in VariableFilter(bricks=mlp.linear_transformations, roles=[BIAS])(cg.variables):
        print v.tag.annotations[0].name
    """

    # check out .tag on the variables to see which layer they belong to

    print "----------------------------"

    D_by_layer = get_linear_transformation_roles(mlp, cg)

    # returns a vector with one entry for each in the mini-batch
    individual_sum_square_norm_gradients_method_00 = get_sum_square_norm_gradients_linear_transformations(
        D_by_layer, cost)

    #import pprint
    #pp = pprint.PrettyPrinter(indent=4)
    #pp.pprint(get_conv_layers_transformation_roles(ComputationGraph(conv_output)).items())

    D_by_layer = get_conv_layers_transformation_roles(
        ComputationGraph(conv_output))
    individual_sum_square_norm_gradients_method_00 += get_sum_square_norm_gradients_conv_transformations(
        D_by_layer, cost)

    print "There are %d entries in cg.parameters." % len(cg.parameters)
    L_grads_method_01 = [tensor.grad(cost, p) for p in cg.parameters]
    L_grads_method_02 = [
        tensor.grad(cost, v)
        for v in VariableFilter(roles=[WEIGHT, BIAS])(cg.variables)
    ]

    # works on the sum of the gradients in a mini-batch
    sum_square_norm_gradients_method_01 = sum(
        [tensor.sqr(g).sum() for g in L_grads_method_01])
    sum_square_norm_gradients_method_02 = sum(
        [tensor.sqr(g).sum() for g in L_grads_method_02])

    N = 8
    Xtrain = np.random.randn(N, nbr_channels, image_shape[0],
                             image_shape[1]).astype(np.float32)

    # Option 1.
    ytrain = np.zeros((N, 10), dtype=np.float32)
    for n in range(N):
        label = np.random.randint(low=0, high=10)
        ytrain[n, label] = 1.0

    # Option 2, just to debug situations with NaN.
    #ytrain = np.random.rand(N, 10).astype(np.float32)
    #for n in range(N):
    #    ytrain[n,:] = ytrain[n,:] / ytrain[n,:].sum()

    f = theano.function([X, y], [
        cost, individual_sum_square_norm_gradients_method_00,
        sum_square_norm_gradients_method_01,
        sum_square_norm_gradients_method_02
    ])

    [c, v0, gs1, gs2] = f(Xtrain, ytrain)

    #print "[c, v0, gs1, gs2]"
    L_c, L_v0, L_gs1, L_gs2 = ([], [], [], [])
    for n in range(N):
        [nc, nv0, ngs1, ngs2] = f(
            Xtrain[n, :].reshape(
                (1, Xtrain.shape[1], Xtrain.shape[2], Xtrain.shape[3])),
            ytrain[n, :].reshape((1, 10)))
        L_c.append(nc)
        L_v0.append(nv0)
        L_gs1.append(ngs1)
        L_gs2.append(ngs2)

    print "Cost for whole mini-batch in single shot : %f." % c
    print "Cost for whole mini-batch accumulated    : %f." % sum(L_c)
    print ""
    print "Square-norm of all gradients for each data point in single shot :"
    print v0.reshape((1, -1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs1).reshape((1, -1))
    print "Square-norm of all gradients for each data point iteratively :"
    print np.array(L_gs2).reshape((1, -1))
    print ""
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs1)))
    print "Difference max abs : %f." % np.max(np.abs(v0 - np.array(L_gs2)))
    print ""
    print "Ratios : "
    print np.array(L_gs1).reshape((1, -1)) / v0.reshape((1, -1))