Ejemplo n.º 1
0
    def __init__(self,
                 input,
                 n_in_maps,
                 n_out_maps,
                 kernel_shape,
                 video_shape,
                 batch_size,
                 activation,
                 layer_name="Conv",
                 rng=RandomState(1234),
                 borrow=True,
                 W=None,
                 b=None):
        """
        video_shape: (frames, height, width)
        kernel_shape: (frames, height, width)

        W_shape: (out, in, kern_frames, kern_height, kern_width)
        """

        self.__dict__.update(locals())
        del self.self

        # init W
        if W != None: W_val = W
        else:
            # fan in: filter time x filter height x filter width x input maps
            fan_in = prod(kernel_shape) * n_in_maps
            norm_scale = 2. * sqrt(1. / fan_in)
            if activation in (relu, softplus): norm_scale = 0.01
            W_shape = (n_out_maps, n_in_maps) + kernel_shape
            W_val = _asarray(rng.normal(loc=0, scale=norm_scale, size=W_shape),\
                        dtype=floatX)
        self.W = shared(value=W_val, borrow=borrow, name=layer_name + '_W')
        self.params = [self.W]

        # init bias
        if b != None:
            b_val = b
        elif activation in (relu, softplus):
            b_val = ones((n_out_maps, ), dtype=floatX)
        else:
            b_val = zeros((n_out_maps, ), dtype=floatX)
        self.b = shared(b_val, name=layer_name + "_b", borrow=borrow)
        self.params.append(self.b)

        # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w)
        n_fr, h, w = video_shape
        n_fr_k, h_k, w_k = kernel_shape
        out = conv3d(signals=input.dimshuffle([0, 2, 1, 3, 4]),
                     filters=self.W,
                     signals_shape=(batch_size, n_fr, n_in_maps, h, w),
                     filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k),
                     border_mode='valid').dimshuffle([0, 2, 1, 3, 4])

        out += self.b.dimshuffle('x', 0, 'x', 'x', 'x')

        self.output = activation(out)
Ejemplo n.º 2
0
        def __init__(self, rng, input, filter_shape, image_shape,W_init,b_init,sparse_count,softmax = 0):
                assert image_shape[1] == filter_shape[1]
                self.input = input
                fan_in = numpy.prod(filter_shape[1:])
                fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]))
                W_bound = numpy.sqrt(6. / (fan_in + fan_out))

                oneZeros = numpy.concatenate(([1],numpy.zeros(sparse_count)))
                x = numpy.insert(numpy.tile(oneZeros,filter_shape[2]-1),
                                 (filter_shape[2]-1)*(len(oneZeros)),1)
                y = numpy.insert(numpy.tile(oneZeros,filter_shape[3]-1),
                                 (filter_shape[3]-1)*(len(oneZeros)),1)
                z = numpy.insert(numpy.tile(oneZeros,filter_shape[4]-1),
                                 (filter_shape[4]-1)*(len(oneZeros)),1)
                mask = numpy.outer(numpy.outer(x,y),z).reshape(len(x),len(y),len(z))
                filter_shape = (filter_shape[0],
                                filter_shape[1],
                                (1 + sparse_count)*filter_shape[2] - sparse_count,
                                (1 + sparse_count)*filter_shape[3] - sparse_count,
                                (1 + sparse_count)*filter_shape[4] - sparse_count )
                self.Wmask = (numpy.ones(filter_shape)*mask).astype(theano.config.floatX)
                
                if W_init != None :
                    W_values = W_init
                else:
                    W_values = numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size= filter_shape)*self.Wmask,
                                          dtype=theano.config.floatX)
                self.W = theano.shared(value = W_values, borrow=True)    

                if b_init != None :
                    b_values = b_init
                else:
                    b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
                self.b = theano.shared(value=b_values, borrow=True)
                self.bmask = numpy.ones((filter_shape[0],),dtype = theano.config.floatX)

                conv_out = conv3d(
                    signals = input.dimshuffle([0,2,1,3,4]),
                    filters=self.W.dimshuffle([0,2,1,3,4]),
                    signals_shape= [image_shape[i] for i in [0,2,1,3,4]],
                    filters_shape=[filter_shape[i] for i in [0,2,1,3,4]],
                    border_mode = 'valid'          
                ).dimshuffle([0,2,1,3,4])
                conv_out += self.b.dimshuffle('x',0,'x','x','x')

                self.outputlen = (image_shape[2]-filter_shape[2] +1,
                                  image_shape[3]-filter_shape[3] +1,
                                  image_shape[4]-filter_shape[4] +1)
                self.output = T.nnet.softplus(conv_out)
                self.params = [self.W, self.b]
                self.masks = [self.Wmask, self.bmask]
                self.num_points = T.prod(self.outputlen)

# initial shape = 1,3,img_shape
                if (softmax):
                    out = conv_out.reshape([conv_out.shape[1],self.num_points]).dimshuffle(1,0)
                    self.p_y_given_x = T.nnet.softmax(out)
                    self.y_pred = T.argmax(self.p_y_given_x,axis = 1)
Ejemplo n.º 3
0
    def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, 
        batch_size,numpy_rng,activation, W=None, b=None,border_mode = 'valid'):

        """
        video_shape: (frames, height, width)
        kernel_shape: (frames, height, width)

        W_shape: (out, in, kern_frames, kern_height, kern_width)
        """

        self.__dict__.update(locals())
        del self.self
        self.activation = activation
        
        # init W
        if W == None:
            # fan in: filter time x filter height x filter width x input maps
            fan_in = prod(kernel_shape)*n_in_maps
            norm_scale = 2. * sqrt( 1. / fan_in )
            if activation in (relu,softplus): norm_scale = 0.01
            W_shape = [n_out_maps, n_in_maps]+kernel_shape
            W_val = _asarray(numpy_rng.normal(loc=0, scale=norm_scale, size=W_shape),\
                        dtype=floatX)
            W = shared(value=W_val, borrow=True, name='W')

        self.W = W

        # init bias
        if b == None: 
            if activation in (relu,softplus): 
                b_val = ones((n_out_maps,), dtype=floatX)
            else: 
                b_val = zeros((n_out_maps,), dtype=floatX)
            b = shared(b_val, name="b", borrow=True)
        self.b = b;

        # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w)
        n_fr, h, w = video_shape
        n_fr_k, h_k, w_k = kernel_shape
        out = conv3d(
                signals=input.dimshuffle([0,2,1,3,4]), 
                filters=self.W, 
                signals_shape=(batch_size, n_fr, n_in_maps, h, w), 
                filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k),         
                border_mode= border_mode).dimshuffle([0,2,1,3,4])

        out += self.b.dimshuffle('x',0,'x','x','x')

        self.delta_W = shared(value = zeros([n_out_maps, n_in_maps]+kernel_shape,dtype=floatX),
            name='delta_W')
        self.delta_b = shared(value = zeros_like(self.b.get_value(borrow=True),
            dtype=floatX), name='delta_b')

        self.output = activation(out)

        self.params = [self.W, self.b]
        self.delta_params = [self.delta_W, self.delta_b]
Ejemplo n.º 4
0
    def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, 
        batch_size, activation, layer_name="Conv", rng=RandomState(1234), 
        borrow=True, W=None, b=None):

        """
        video_shape: (frames, height, width)
        kernel_shape: (frames, height, width)

        W_shape: (out, in, kern_frames, kern_height, kern_width)
        """

        self.__dict__.update(locals())
        del self.self
        
        # init W
        if W != None: W_val = W
        else: 
            # fan in: filter time x filter height x filter width x input maps
            fan_in = prod(kernel_shape)*n_in_maps
            norm_scale = 2. * sqrt( 1. / fan_in )
            if activation in (relu,softplus): norm_scale = 0.01
            W_shape = (n_out_maps, n_in_maps)+kernel_shape
            W_val = _asarray(rng.normal(loc=0, scale=norm_scale, size=W_shape),\
                        dtype=floatX)
        self.W = shared(value=W_val, borrow=borrow, name=layer_name+'_W')
        self.params = [self.W]

        # init bias
        if b != None: 
            b_val = b
        elif activation in (relu,softplus): 
            b_val = ones((n_out_maps,), dtype=floatX)
        else: 
            b_val = zeros((n_out_maps,), dtype=floatX)
        self.b = shared(b_val, name=layer_name+"_b", borrow=borrow)
        self.params.append(self.b)

        # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w)
        n_fr, h, w = video_shape
        n_fr_k, h_k, w_k = kernel_shape
        out = conv3d(
                signals=input.dimshuffle([0,2,1,3,4]), 
                filters=self.W, 
                signals_shape=(batch_size, n_fr, n_in_maps, h, w), 
                filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k),         
                border_mode='valid').dimshuffle([0,2,1,3,4])

        out += self.b.dimshuffle('x',0,'x','x','x')

        self.output = activation(out)
Ejemplo n.º 5
0
    def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, pool_shape,
                 batch_size, layer_name="Conv", rng=RandomState(1234),
                 borrow=True, W=None, b=None):

        """
        video_shape: (frames, height, width)
        kernel_shape: (frames, height, width)
        W_shape: (out, in, kern_frames, kern_height, kern_width)
        """

        # init W
        if W is not None:
            self.W = W
        else:
            # fan in: filter time x filter height x filter width x input maps
            fan_in = prod(kernel_shape) * n_in_maps
            norm_scale = 2. * sqrt(1. / fan_in)
            W_shape = (n_out_maps, n_in_maps) + kernel_shape
            W_val = _asarray(rng.normal(loc=0, scale=norm_scale, size=W_shape), dtype=floatX)
            self.W = shared(value=W_val, borrow=borrow, name=layer_name + '_W')

        # init bias
        if b is not None:
            self.b = b
        else:
            b_val = zeros((n_out_maps,), dtype=floatX)
            self.b = shared(b_val, name=layer_name + "_b", borrow=borrow)

        self.params = [self.W, self.b]

        # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w)
        n_fr, h, w = video_shape
        n_fr_k, h_k, w_k = kernel_shape
        signals = input.dimshuffle([0, 2, 1, 3, 4])
        out = conv3d(
            signals=signals,
            filters=self.W,
            signals_shape=(batch_size, n_fr, n_in_maps, h, w),
            filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k),
            border_mode='valid').dimshuffle([0, 2, 1, 3, 4])

        pooled_out = max_pool_3d(out, pool_shape, ignore_border=True)
        pooled_out += self.b.dimshuffle('x', 0, 'x', 'x', 'x')

        self.output = T.tanh(pooled_out)
Ejemplo n.º 6
0
        def __init__(self,input,image_shape, pool_size,sparse_count):

                #not implementing max pooling as of now. have to do with average pooling    
                oneZeros = numpy.concatenate(([1],numpy.zeros(sparse_count)))
                x = numpy.insert(numpy.tile(oneZeros,pool_size[0]-1),
                                 (pool_size[0]-1)*(len(oneZeros)),1)
                y = numpy.insert(numpy.tile(oneZeros,pool_size[1]-1),
                                 (pool_size[1]-1)*(len(oneZeros)),1)
                z = numpy.insert(numpy.tile(oneZeros,pool_size[2]-1),
                                 (pool_size[2]-1)*(len(oneZeros)),1)
                mask = numpy.outer(numpy.outer(x,y),z).reshape(len(x),len(y),len(z))
                mask = numpy.ones((1,1,len(x),len(y),len(z)))*mask
                self.pool_mask = mask.astype(theano.config.floatX)/numpy.prod(pool_size)

                frame_shape = input.shape[-3:]
                batch_size = T.shape_padright(T.prod(input.shape[:-3]),1)
                new_shape = T.cast(T.join(0, batch_size,
                                        T.as_tensor([1,]), 
                                        frame_shape), 'int32')
                filter_shape = (1,1,len(x),len(y),len(z))
                input_5d = T.reshape(input,new_shape,ndim = 5)         
                image_shape = (image_shape[0]*image_shape[1],
                               1,
                               image_shape[2],
                               image_shape[3],
                               image_shape[4])
                avg_out = conv3d(
                    signals = input_5d.dimshuffle([0,2,1,3,4]),
                    filters = self.pool_mask.transpose(0,2,1,3,4),
                    signals_shape = [image_shape[i] for i in [0,2,1,3,4]],
                    filters_shape = [filter_shape[i] for i in [0,2,1,3,4]],
                    border_mode = 'valid').dimshuffle([0,2,1,3,4])
                outshp = T.join(0,input.shape[:-3],avg_out.shape[-3:])
                avg_out = T.reshape(avg_out,outshp,ndim = 5)

                self.outputlen = (image_shape[2] - len(x) + 1,
                                  image_shape[3] - len(y) + 1,
                                  image_shape[4] - len(z) + 1)
                self.output = avg_out
Ejemplo n.º 7
0
    def __init__(self, input, image_shape, pool_size, sparse_count):

        #not implementing max pooling as of now. have to do with average pooling
        oneZeros = numpy.concatenate(([1], numpy.zeros(sparse_count)))
        x = numpy.insert(numpy.tile(oneZeros, pool_size[0] - 1),
                         (pool_size[0] - 1) * (len(oneZeros)), 1)
        y = numpy.insert(numpy.tile(oneZeros, pool_size[1] - 1),
                         (pool_size[1] - 1) * (len(oneZeros)), 1)
        z = numpy.insert(numpy.tile(oneZeros, pool_size[2] - 1),
                         (pool_size[2] - 1) * (len(oneZeros)), 1)
        mask = numpy.outer(numpy.outer(x, y),
                           z).reshape(len(x), len(y), len(z))
        mask = numpy.ones((1, 1, len(x), len(y), len(z))) * mask
        self.pool_mask = mask.astype(
            theano.config.floatX) / numpy.prod(pool_size)

        frame_shape = input.shape[-3:]
        batch_size = T.shape_padright(T.prod(input.shape[:-3]), 1)
        new_shape = T.cast(
            T.join(0, batch_size, T.as_tensor([
                1,
            ]), frame_shape), 'int32')
        filter_shape = (1, 1, len(x), len(y), len(z))
        input_5d = T.reshape(input, new_shape, ndim=5)
        image_shape = (image_shape[0] * image_shape[1], 1, image_shape[2],
                       image_shape[3], image_shape[4])
        avg_out = conv3d(
            signals=input_5d.dimshuffle([0, 2, 1, 3, 4]),
            filters=self.pool_mask.transpose(0, 2, 1, 3, 4),
            signals_shape=[image_shape[i] for i in [0, 2, 1, 3, 4]],
            filters_shape=[filter_shape[i] for i in [0, 2, 1, 3, 4]],
            border_mode='valid').dimshuffle([0, 2, 1, 3, 4])
        outshp = T.join(0, input.shape[:-3], avg_out.shape[-3:])
        avg_out = T.reshape(avg_out, outshp, ndim=5)

        self.outputlen = (image_shape[2] - len(x) + 1,
                          image_shape[3] - len(y) + 1,
                          image_shape[4] - len(z) + 1)
        self.output = avg_out
    def __init__(self,
                 rng,
                 input,
                 signal_shape,
                 filter_shape,
                 poolsize=(2, 2, 2),
                 stride=None,
                 if_pool=False,
                 if_hidden_pool=False,
                 act=None,
                 share_with=None,
                 tied=None,
                 border_mode='valid'):
        self.input = input

        if share_with:
            self.W = share_with.W
            self.b = share_with.b

            self.W_delta = share_with.W_delta
            self.b_delta = share_with.b_delta

        elif tied:
            self.W = tied.W.dimshuffle(1, 0, 2, 3)
            self.b = tied.b

            self.W_delta = tied.W_delta.dimshuffle(1, 0, 2, 3)
            self.b_delta = tied.b_delta

        else:
            fan_in = np.prod(filter_shape[1:])
            poolsize_size = np.prod(poolsize) if poolsize else 1
            fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) /
                       poolsize_size)
            W_bound = np.sqrt(6. / (fan_in + fan_out))
            self.W = theano.shared(np.asarray(rng.uniform(low=-W_bound,
                                                          high=W_bound,
                                                          size=filter_shape),
                                              dtype=theano.config.floatX),
                                   borrow=True)
            b_values = np.zeros((filter_shape[0], ),
                                dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, borrow=True)

            self.W_delta = theano.shared(np.zeros(filter_shape,
                                                  dtype=theano.config.floatX),
                                         borrow=True)

            self.b_delta = theano.shared(value=b_values, borrow=True)

        # convolution
        conv_out = conv3d2d.conv3d(signals=input,
                                   filters=self.W,
                                   signals_shape=signal_shape,
                                   filters_shape=filter_shape,
                                   border_mode=border_mode)

        #if poolsize:
        if if_pool:
            conv_out = conv_out.dimshuffle(
                0, 2, 1, 3, 4)  #maxpool3d works on last 3 dimesnions
            pooled_out = maxpool3d.max_pool_3d(input=conv_out,
                                               ds=poolsize,
                                               ignore_border=True)
            tmp_out = pooled_out.dimshuffle(0, 2, 1, 3, 4)
            tmp = tmp_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x')
        elif if_hidden_pool:
            pooled_out = downsample.max_pool_2d(input=conv_out,
                                                ds=poolsize[:2],
                                                st=stride,
                                                ignore_border=True)
            tmp = pooled_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x')
        else:
            tmp = conv_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x')

        if act == 'tanh':
            self.output = T.tanh(tmp)
        elif act == 'sigmoid':
            self.output = nnet.sigmoid(tmp)
        elif act == 'relu':
            # self.output = tmp * (tmp>0)
            self.output = 0.5 * (tmp + abs(tmp)) + 1e-9
        elif act == 'softplus':
            # self.output = T.log2(1+T.exp(tmp))
            self.output = nnet.softplus(tmp)
        else:
            self.output = tmp

        self.get_activation = theano.function([self.input],
                                              self.output,
                                              updates=None,
                                              name='get hidden activation')

        # store parameters of this layer
        self.params = [self.W, self.b]
        self.deltas = [self.W_delta, self.b_delta]
    def __init__(self, rng, input, signal_shape, filter_shape, poolsize=(2, 2, 2), stride=None, if_pool=False, if_hidden_pool=False,
                 act=None,
                 share_with=None,
                 tied=None,
                 border_mode='valid'):
        self.input = input

        if share_with:
            self.W = share_with.W
            self.b = share_with.b

            self.W_delta = share_with.W_delta
            self.b_delta = share_with.b_delta

        elif tied:
            self.W = tied.W.dimshuffle(1,0,2,3)
            self.b = tied.b

            self.W_delta = tied.W_delta.dimshuffle(1,0,2,3)
            self.b_delta = tied.b_delta

        else:
            fan_in = np.prod(filter_shape[1:])
            poolsize_size = np.prod(poolsize) if poolsize else 1
            fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) / poolsize_size)
            W_bound = np.sqrt(6. / (fan_in + fan_out))
            self.W = theano.shared(
                np.asarray(
                    rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                    dtype=theano.config.floatX
                ),
                borrow=True
            )
            b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, borrow=True)

            self.W_delta = theano.shared(
                np.zeros(filter_shape, dtype=theano.config.floatX),
                borrow=True
            )

            self.b_delta = theano.shared(value=b_values, borrow=True)

        # convolution
        conv_out = conv3d2d.conv3d(
            signals=input,
            filters=self.W,
            signals_shape=signal_shape,
            filters_shape=filter_shape,
            border_mode=border_mode)

        #if poolsize:
        if if_pool:
            conv_out = conv_out.dimshuffle(0,2,1,3,4) #maxpool3d works on last 3 dimesnions
            pooled_out = maxpool3d.max_pool_3d(
                input=conv_out,
                ds=poolsize,
                ignore_border=True)
            tmp_out = pooled_out.dimshuffle(0,2,1,3,4)
            tmp = tmp_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x')
        elif if_hidden_pool:
            pooled_out = downsample.max_pool_2d(
                input=conv_out,
                ds=poolsize[:2],
                st=stride,
                ignore_border=True)
            tmp = pooled_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x')
        else:
            tmp = conv_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x')

        if act == 'tanh':
            self.output = T.tanh(tmp)
        elif act == 'sigmoid':
            self.output = nnet.sigmoid(tmp)
        elif act == 'relu':
            # self.output = tmp * (tmp>0)
            self.output = 0.5 * (tmp + abs(tmp)) + 1e-9
        elif act == 'softplus':
            # self.output = T.log2(1+T.exp(tmp))
            self.output = nnet.softplus(tmp)
        else:
            self.output = tmp

        self.get_activation = theano.function(
            [self.input],
            self.output,
            updates=None,
            name='get hidden activation')

        # store parameters of this layer
        self.params = [self.W, self.b]
        self.deltas = [self.W_delta, self.b_delta]
Ejemplo n.º 10
0
    def __init__(self,
                 rng,
                 input,
                 filter_shape,
                 image_shape,
                 W_init,
                 b_init,
                 sparse_count,
                 softmax=0):
        assert image_shape[1] == filter_shape[1]
        self.input = input
        fan_in = numpy.prod(filter_shape[1:])
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]))
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))

        oneZeros = numpy.concatenate(([1], numpy.zeros(sparse_count)))
        x = numpy.insert(numpy.tile(oneZeros, filter_shape[2] - 1),
                         (filter_shape[2] - 1) * (len(oneZeros)), 1)
        y = numpy.insert(numpy.tile(oneZeros, filter_shape[3] - 1),
                         (filter_shape[3] - 1) * (len(oneZeros)), 1)
        z = numpy.insert(numpy.tile(oneZeros, filter_shape[4] - 1),
                         (filter_shape[4] - 1) * (len(oneZeros)), 1)
        mask = numpy.outer(numpy.outer(x, y),
                           z).reshape(len(x), len(y), len(z))
        filter_shape = (filter_shape[0], filter_shape[1],
                        (1 + sparse_count) * filter_shape[2] - sparse_count,
                        (1 + sparse_count) * filter_shape[3] - sparse_count,
                        (1 + sparse_count) * filter_shape[4] - sparse_count)
        self.Wmask = (numpy.ones(filter_shape) * mask).astype(
            theano.config.floatX)

        if W_init != None:
            W_values = W_init
        else:
            W_values = numpy.asarray(
                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape) *
                self.Wmask,
                dtype=theano.config.floatX)
        self.W = theano.shared(value=W_values, borrow=True)

        if b_init != None:
            b_values = b_init
        else:
            b_values = numpy.zeros((filter_shape[0], ),
                                   dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)
        self.bmask = numpy.ones((filter_shape[0], ),
                                dtype=theano.config.floatX)

        conv_out = conv3d(
            signals=input.dimshuffle([0, 2, 1, 3, 4]),
            filters=self.W.dimshuffle([0, 2, 1, 3, 4]),
            signals_shape=[image_shape[i] for i in [0, 2, 1, 3, 4]],
            filters_shape=[filter_shape[i] for i in [0, 2, 1, 3, 4]],
            border_mode='valid').dimshuffle([0, 2, 1, 3, 4])
        conv_out += self.b.dimshuffle('x', 0, 'x', 'x', 'x')

        self.outputlen = (image_shape[2] - filter_shape[2] + 1,
                          image_shape[3] - filter_shape[3] + 1,
                          image_shape[4] - filter_shape[4] + 1)
        self.output = T.nnet.softplus(conv_out)
        self.params = [self.W, self.b]
        self.masks = [self.Wmask, self.bmask]
        self.num_points = T.prod(self.outputlen)

        # initial shape = 1,3,img_shape
        if (softmax):
            out = conv_out.reshape([conv_out.shape[1],
                                    self.num_points]).dimshuffle(1, 0)
            self.p_y_given_x = T.nnet.softmax(out)
            self.y_pred = T.argmax(self.p_y_given_x, axis=1)
Ejemplo n.º 11
0
    def __init__(self,
                 input,
                 n_in_maps,
                 n_out_maps,
                 kernel_shape,
                 video_shape,
                 batch_size,
                 numpy_rng,
                 activation,
                 W=None,
                 b=None,
                 border_mode='valid'):
        """
        video_shape: (frames, height, width)
        kernel_shape: (frames, height, width)

        W_shape: (out, in, kern_frames, kern_height, kern_width)
        """

        self.__dict__.update(locals())
        del self.self
        self.activation = activation

        # init W
        if W == None:
            # fan in: filter time x filter height x filter width x input maps
            fan_in = prod(kernel_shape) * n_in_maps
            norm_scale = 2. * sqrt(1. / fan_in)
            if activation in (relu, softplus): norm_scale = 0.01
            W_shape = [n_out_maps, n_in_maps] + kernel_shape
            W_val = _asarray(numpy_rng.normal(loc=0, scale=norm_scale, size=W_shape),\
                        dtype=floatX)
            W = shared(value=W_val, borrow=True, name='W')

        self.W = W

        # init bias
        if b == None:
            if activation in (relu, softplus):
                b_val = ones((n_out_maps, ), dtype=floatX)
            else:
                b_val = zeros((n_out_maps, ), dtype=floatX)
            b = shared(b_val, name="b", borrow=True)
        self.b = b

        # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w)
        n_fr, h, w = video_shape
        n_fr_k, h_k, w_k = kernel_shape
        out = conv3d(signals=input.dimshuffle([0, 2, 1, 3, 4]),
                     filters=self.W,
                     signals_shape=(batch_size, n_fr, n_in_maps, h, w),
                     filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k),
                     border_mode=border_mode).dimshuffle([0, 2, 1, 3, 4])

        out += self.b.dimshuffle('x', 0, 'x', 'x', 'x')

        self.delta_W = shared(value=zeros([n_out_maps, n_in_maps] +
                                          kernel_shape,
                                          dtype=floatX),
                              name='delta_W')
        self.delta_b = shared(value=zeros_like(self.b.get_value(borrow=True),
                                               dtype=floatX),
                              name='delta_b')

        self.output = activation(out)

        self.params = [self.W, self.b]
        self.delta_params = [self.delta_W, self.delta_b]