def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, batch_size, activation, layer_name="Conv", rng=RandomState(1234), borrow=True, W=None, b=None): """ video_shape: (frames, height, width) kernel_shape: (frames, height, width) W_shape: (out, in, kern_frames, kern_height, kern_width) """ self.__dict__.update(locals()) del self.self # init W if W != None: W_val = W else: # fan in: filter time x filter height x filter width x input maps fan_in = prod(kernel_shape) * n_in_maps norm_scale = 2. * sqrt(1. / fan_in) if activation in (relu, softplus): norm_scale = 0.01 W_shape = (n_out_maps, n_in_maps) + kernel_shape W_val = _asarray(rng.normal(loc=0, scale=norm_scale, size=W_shape),\ dtype=floatX) self.W = shared(value=W_val, borrow=borrow, name=layer_name + '_W') self.params = [self.W] # init bias if b != None: b_val = b elif activation in (relu, softplus): b_val = ones((n_out_maps, ), dtype=floatX) else: b_val = zeros((n_out_maps, ), dtype=floatX) self.b = shared(b_val, name=layer_name + "_b", borrow=borrow) self.params.append(self.b) # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w) n_fr, h, w = video_shape n_fr_k, h_k, w_k = kernel_shape out = conv3d(signals=input.dimshuffle([0, 2, 1, 3, 4]), filters=self.W, signals_shape=(batch_size, n_fr, n_in_maps, h, w), filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k), border_mode='valid').dimshuffle([0, 2, 1, 3, 4]) out += self.b.dimshuffle('x', 0, 'x', 'x', 'x') self.output = activation(out)
def __init__(self, rng, input, filter_shape, image_shape,W_init,b_init,sparse_count,softmax = 0): assert image_shape[1] == filter_shape[1] self.input = input fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:])) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) oneZeros = numpy.concatenate(([1],numpy.zeros(sparse_count))) x = numpy.insert(numpy.tile(oneZeros,filter_shape[2]-1), (filter_shape[2]-1)*(len(oneZeros)),1) y = numpy.insert(numpy.tile(oneZeros,filter_shape[3]-1), (filter_shape[3]-1)*(len(oneZeros)),1) z = numpy.insert(numpy.tile(oneZeros,filter_shape[4]-1), (filter_shape[4]-1)*(len(oneZeros)),1) mask = numpy.outer(numpy.outer(x,y),z).reshape(len(x),len(y),len(z)) filter_shape = (filter_shape[0], filter_shape[1], (1 + sparse_count)*filter_shape[2] - sparse_count, (1 + sparse_count)*filter_shape[3] - sparse_count, (1 + sparse_count)*filter_shape[4] - sparse_count ) self.Wmask = (numpy.ones(filter_shape)*mask).astype(theano.config.floatX) if W_init != None : W_values = W_init else: W_values = numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size= filter_shape)*self.Wmask, dtype=theano.config.floatX) self.W = theano.shared(value = W_values, borrow=True) if b_init != None : b_values = b_init else: b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) self.bmask = numpy.ones((filter_shape[0],),dtype = theano.config.floatX) conv_out = conv3d( signals = input.dimshuffle([0,2,1,3,4]), filters=self.W.dimshuffle([0,2,1,3,4]), signals_shape= [image_shape[i] for i in [0,2,1,3,4]], filters_shape=[filter_shape[i] for i in [0,2,1,3,4]], border_mode = 'valid' ).dimshuffle([0,2,1,3,4]) conv_out += self.b.dimshuffle('x',0,'x','x','x') self.outputlen = (image_shape[2]-filter_shape[2] +1, image_shape[3]-filter_shape[3] +1, image_shape[4]-filter_shape[4] +1) self.output = T.nnet.softplus(conv_out) self.params = [self.W, self.b] self.masks = [self.Wmask, self.bmask] self.num_points = T.prod(self.outputlen) # initial shape = 1,3,img_shape if (softmax): out = conv_out.reshape([conv_out.shape[1],self.num_points]).dimshuffle(1,0) self.p_y_given_x = T.nnet.softmax(out) self.y_pred = T.argmax(self.p_y_given_x,axis = 1)
def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, batch_size,numpy_rng,activation, W=None, b=None,border_mode = 'valid'): """ video_shape: (frames, height, width) kernel_shape: (frames, height, width) W_shape: (out, in, kern_frames, kern_height, kern_width) """ self.__dict__.update(locals()) del self.self self.activation = activation # init W if W == None: # fan in: filter time x filter height x filter width x input maps fan_in = prod(kernel_shape)*n_in_maps norm_scale = 2. * sqrt( 1. / fan_in ) if activation in (relu,softplus): norm_scale = 0.01 W_shape = [n_out_maps, n_in_maps]+kernel_shape W_val = _asarray(numpy_rng.normal(loc=0, scale=norm_scale, size=W_shape),\ dtype=floatX) W = shared(value=W_val, borrow=True, name='W') self.W = W # init bias if b == None: if activation in (relu,softplus): b_val = ones((n_out_maps,), dtype=floatX) else: b_val = zeros((n_out_maps,), dtype=floatX) b = shared(b_val, name="b", borrow=True) self.b = b; # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w) n_fr, h, w = video_shape n_fr_k, h_k, w_k = kernel_shape out = conv3d( signals=input.dimshuffle([0,2,1,3,4]), filters=self.W, signals_shape=(batch_size, n_fr, n_in_maps, h, w), filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k), border_mode= border_mode).dimshuffle([0,2,1,3,4]) out += self.b.dimshuffle('x',0,'x','x','x') self.delta_W = shared(value = zeros([n_out_maps, n_in_maps]+kernel_shape,dtype=floatX), name='delta_W') self.delta_b = shared(value = zeros_like(self.b.get_value(borrow=True), dtype=floatX), name='delta_b') self.output = activation(out) self.params = [self.W, self.b] self.delta_params = [self.delta_W, self.delta_b]
def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, batch_size, activation, layer_name="Conv", rng=RandomState(1234), borrow=True, W=None, b=None): """ video_shape: (frames, height, width) kernel_shape: (frames, height, width) W_shape: (out, in, kern_frames, kern_height, kern_width) """ self.__dict__.update(locals()) del self.self # init W if W != None: W_val = W else: # fan in: filter time x filter height x filter width x input maps fan_in = prod(kernel_shape)*n_in_maps norm_scale = 2. * sqrt( 1. / fan_in ) if activation in (relu,softplus): norm_scale = 0.01 W_shape = (n_out_maps, n_in_maps)+kernel_shape W_val = _asarray(rng.normal(loc=0, scale=norm_scale, size=W_shape),\ dtype=floatX) self.W = shared(value=W_val, borrow=borrow, name=layer_name+'_W') self.params = [self.W] # init bias if b != None: b_val = b elif activation in (relu,softplus): b_val = ones((n_out_maps,), dtype=floatX) else: b_val = zeros((n_out_maps,), dtype=floatX) self.b = shared(b_val, name=layer_name+"_b", borrow=borrow) self.params.append(self.b) # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w) n_fr, h, w = video_shape n_fr_k, h_k, w_k = kernel_shape out = conv3d( signals=input.dimshuffle([0,2,1,3,4]), filters=self.W, signals_shape=(batch_size, n_fr, n_in_maps, h, w), filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k), border_mode='valid').dimshuffle([0,2,1,3,4]) out += self.b.dimshuffle('x',0,'x','x','x') self.output = activation(out)
def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, pool_shape, batch_size, layer_name="Conv", rng=RandomState(1234), borrow=True, W=None, b=None): """ video_shape: (frames, height, width) kernel_shape: (frames, height, width) W_shape: (out, in, kern_frames, kern_height, kern_width) """ # init W if W is not None: self.W = W else: # fan in: filter time x filter height x filter width x input maps fan_in = prod(kernel_shape) * n_in_maps norm_scale = 2. * sqrt(1. / fan_in) W_shape = (n_out_maps, n_in_maps) + kernel_shape W_val = _asarray(rng.normal(loc=0, scale=norm_scale, size=W_shape), dtype=floatX) self.W = shared(value=W_val, borrow=borrow, name=layer_name + '_W') # init bias if b is not None: self.b = b else: b_val = zeros((n_out_maps,), dtype=floatX) self.b = shared(b_val, name=layer_name + "_b", borrow=borrow) self.params = [self.W, self.b] # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w) n_fr, h, w = video_shape n_fr_k, h_k, w_k = kernel_shape signals = input.dimshuffle([0, 2, 1, 3, 4]) out = conv3d( signals=signals, filters=self.W, signals_shape=(batch_size, n_fr, n_in_maps, h, w), filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k), border_mode='valid').dimshuffle([0, 2, 1, 3, 4]) pooled_out = max_pool_3d(out, pool_shape, ignore_border=True) pooled_out += self.b.dimshuffle('x', 0, 'x', 'x', 'x') self.output = T.tanh(pooled_out)
def __init__(self,input,image_shape, pool_size,sparse_count): #not implementing max pooling as of now. have to do with average pooling oneZeros = numpy.concatenate(([1],numpy.zeros(sparse_count))) x = numpy.insert(numpy.tile(oneZeros,pool_size[0]-1), (pool_size[0]-1)*(len(oneZeros)),1) y = numpy.insert(numpy.tile(oneZeros,pool_size[1]-1), (pool_size[1]-1)*(len(oneZeros)),1) z = numpy.insert(numpy.tile(oneZeros,pool_size[2]-1), (pool_size[2]-1)*(len(oneZeros)),1) mask = numpy.outer(numpy.outer(x,y),z).reshape(len(x),len(y),len(z)) mask = numpy.ones((1,1,len(x),len(y),len(z)))*mask self.pool_mask = mask.astype(theano.config.floatX)/numpy.prod(pool_size) frame_shape = input.shape[-3:] batch_size = T.shape_padright(T.prod(input.shape[:-3]),1) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1,]), frame_shape), 'int32') filter_shape = (1,1,len(x),len(y),len(z)) input_5d = T.reshape(input,new_shape,ndim = 5) image_shape = (image_shape[0]*image_shape[1], 1, image_shape[2], image_shape[3], image_shape[4]) avg_out = conv3d( signals = input_5d.dimshuffle([0,2,1,3,4]), filters = self.pool_mask.transpose(0,2,1,3,4), signals_shape = [image_shape[i] for i in [0,2,1,3,4]], filters_shape = [filter_shape[i] for i in [0,2,1,3,4]], border_mode = 'valid').dimshuffle([0,2,1,3,4]) outshp = T.join(0,input.shape[:-3],avg_out.shape[-3:]) avg_out = T.reshape(avg_out,outshp,ndim = 5) self.outputlen = (image_shape[2] - len(x) + 1, image_shape[3] - len(y) + 1, image_shape[4] - len(z) + 1) self.output = avg_out
def __init__(self, input, image_shape, pool_size, sparse_count): #not implementing max pooling as of now. have to do with average pooling oneZeros = numpy.concatenate(([1], numpy.zeros(sparse_count))) x = numpy.insert(numpy.tile(oneZeros, pool_size[0] - 1), (pool_size[0] - 1) * (len(oneZeros)), 1) y = numpy.insert(numpy.tile(oneZeros, pool_size[1] - 1), (pool_size[1] - 1) * (len(oneZeros)), 1) z = numpy.insert(numpy.tile(oneZeros, pool_size[2] - 1), (pool_size[2] - 1) * (len(oneZeros)), 1) mask = numpy.outer(numpy.outer(x, y), z).reshape(len(x), len(y), len(z)) mask = numpy.ones((1, 1, len(x), len(y), len(z))) * mask self.pool_mask = mask.astype( theano.config.floatX) / numpy.prod(pool_size) frame_shape = input.shape[-3:] batch_size = T.shape_padright(T.prod(input.shape[:-3]), 1) new_shape = T.cast( T.join(0, batch_size, T.as_tensor([ 1, ]), frame_shape), 'int32') filter_shape = (1, 1, len(x), len(y), len(z)) input_5d = T.reshape(input, new_shape, ndim=5) image_shape = (image_shape[0] * image_shape[1], 1, image_shape[2], image_shape[3], image_shape[4]) avg_out = conv3d( signals=input_5d.dimshuffle([0, 2, 1, 3, 4]), filters=self.pool_mask.transpose(0, 2, 1, 3, 4), signals_shape=[image_shape[i] for i in [0, 2, 1, 3, 4]], filters_shape=[filter_shape[i] for i in [0, 2, 1, 3, 4]], border_mode='valid').dimshuffle([0, 2, 1, 3, 4]) outshp = T.join(0, input.shape[:-3], avg_out.shape[-3:]) avg_out = T.reshape(avg_out, outshp, ndim=5) self.outputlen = (image_shape[2] - len(x) + 1, image_shape[3] - len(y) + 1, image_shape[4] - len(z) + 1) self.output = avg_out
def __init__(self, rng, input, signal_shape, filter_shape, poolsize=(2, 2, 2), stride=None, if_pool=False, if_hidden_pool=False, act=None, share_with=None, tied=None, border_mode='valid'): self.input = input if share_with: self.W = share_with.W self.b = share_with.b self.W_delta = share_with.W_delta self.b_delta = share_with.b_delta elif tied: self.W = tied.W.dimshuffle(1, 0, 2, 3) self.b = tied.b self.W_delta = tied.W_delta.dimshuffle(1, 0, 2, 3) self.b_delta = tied.b_delta else: fan_in = np.prod(filter_shape[1:]) poolsize_size = np.prod(poolsize) if poolsize else 1 fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) / poolsize_size) W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(np.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) b_values = np.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) self.W_delta = theano.shared(np.zeros(filter_shape, dtype=theano.config.floatX), borrow=True) self.b_delta = theano.shared(value=b_values, borrow=True) # convolution conv_out = conv3d2d.conv3d(signals=input, filters=self.W, signals_shape=signal_shape, filters_shape=filter_shape, border_mode=border_mode) #if poolsize: if if_pool: conv_out = conv_out.dimshuffle( 0, 2, 1, 3, 4) #maxpool3d works on last 3 dimesnions pooled_out = maxpool3d.max_pool_3d(input=conv_out, ds=poolsize, ignore_border=True) tmp_out = pooled_out.dimshuffle(0, 2, 1, 3, 4) tmp = tmp_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') elif if_hidden_pool: pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize[:2], st=stride, ignore_border=True) tmp = pooled_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') else: tmp = conv_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') if act == 'tanh': self.output = T.tanh(tmp) elif act == 'sigmoid': self.output = nnet.sigmoid(tmp) elif act == 'relu': # self.output = tmp * (tmp>0) self.output = 0.5 * (tmp + abs(tmp)) + 1e-9 elif act == 'softplus': # self.output = T.log2(1+T.exp(tmp)) self.output = nnet.softplus(tmp) else: self.output = tmp self.get_activation = theano.function([self.input], self.output, updates=None, name='get hidden activation') # store parameters of this layer self.params = [self.W, self.b] self.deltas = [self.W_delta, self.b_delta]
def __init__(self, rng, input, signal_shape, filter_shape, poolsize=(2, 2, 2), stride=None, if_pool=False, if_hidden_pool=False, act=None, share_with=None, tied=None, border_mode='valid'): self.input = input if share_with: self.W = share_with.W self.b = share_with.b self.W_delta = share_with.W_delta self.b_delta = share_with.b_delta elif tied: self.W = tied.W.dimshuffle(1,0,2,3) self.b = tied.b self.W_delta = tied.W_delta.dimshuffle(1,0,2,3) self.b_delta = tied.b_delta else: fan_in = np.prod(filter_shape[1:]) poolsize_size = np.prod(poolsize) if poolsize else 1 fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) / poolsize_size) W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( np.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) self.W_delta = theano.shared( np.zeros(filter_shape, dtype=theano.config.floatX), borrow=True ) self.b_delta = theano.shared(value=b_values, borrow=True) # convolution conv_out = conv3d2d.conv3d( signals=input, filters=self.W, signals_shape=signal_shape, filters_shape=filter_shape, border_mode=border_mode) #if poolsize: if if_pool: conv_out = conv_out.dimshuffle(0,2,1,3,4) #maxpool3d works on last 3 dimesnions pooled_out = maxpool3d.max_pool_3d( input=conv_out, ds=poolsize, ignore_border=True) tmp_out = pooled_out.dimshuffle(0,2,1,3,4) tmp = tmp_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') elif if_hidden_pool: pooled_out = downsample.max_pool_2d( input=conv_out, ds=poolsize[:2], st=stride, ignore_border=True) tmp = pooled_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') else: tmp = conv_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') if act == 'tanh': self.output = T.tanh(tmp) elif act == 'sigmoid': self.output = nnet.sigmoid(tmp) elif act == 'relu': # self.output = tmp * (tmp>0) self.output = 0.5 * (tmp + abs(tmp)) + 1e-9 elif act == 'softplus': # self.output = T.log2(1+T.exp(tmp)) self.output = nnet.softplus(tmp) else: self.output = tmp self.get_activation = theano.function( [self.input], self.output, updates=None, name='get hidden activation') # store parameters of this layer self.params = [self.W, self.b] self.deltas = [self.W_delta, self.b_delta]
def __init__(self, rng, input, filter_shape, image_shape, W_init, b_init, sparse_count, softmax=0): assert image_shape[1] == filter_shape[1] self.input = input fan_in = numpy.prod(filter_shape[1:]) fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:])) W_bound = numpy.sqrt(6. / (fan_in + fan_out)) oneZeros = numpy.concatenate(([1], numpy.zeros(sparse_count))) x = numpy.insert(numpy.tile(oneZeros, filter_shape[2] - 1), (filter_shape[2] - 1) * (len(oneZeros)), 1) y = numpy.insert(numpy.tile(oneZeros, filter_shape[3] - 1), (filter_shape[3] - 1) * (len(oneZeros)), 1) z = numpy.insert(numpy.tile(oneZeros, filter_shape[4] - 1), (filter_shape[4] - 1) * (len(oneZeros)), 1) mask = numpy.outer(numpy.outer(x, y), z).reshape(len(x), len(y), len(z)) filter_shape = (filter_shape[0], filter_shape[1], (1 + sparse_count) * filter_shape[2] - sparse_count, (1 + sparse_count) * filter_shape[3] - sparse_count, (1 + sparse_count) * filter_shape[4] - sparse_count) self.Wmask = (numpy.ones(filter_shape) * mask).astype( theano.config.floatX) if W_init != None: W_values = W_init else: W_values = numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape) * self.Wmask, dtype=theano.config.floatX) self.W = theano.shared(value=W_values, borrow=True) if b_init != None: b_values = b_init else: b_values = numpy.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) self.bmask = numpy.ones((filter_shape[0], ), dtype=theano.config.floatX) conv_out = conv3d( signals=input.dimshuffle([0, 2, 1, 3, 4]), filters=self.W.dimshuffle([0, 2, 1, 3, 4]), signals_shape=[image_shape[i] for i in [0, 2, 1, 3, 4]], filters_shape=[filter_shape[i] for i in [0, 2, 1, 3, 4]], border_mode='valid').dimshuffle([0, 2, 1, 3, 4]) conv_out += self.b.dimshuffle('x', 0, 'x', 'x', 'x') self.outputlen = (image_shape[2] - filter_shape[2] + 1, image_shape[3] - filter_shape[3] + 1, image_shape[4] - filter_shape[4] + 1) self.output = T.nnet.softplus(conv_out) self.params = [self.W, self.b] self.masks = [self.Wmask, self.bmask] self.num_points = T.prod(self.outputlen) # initial shape = 1,3,img_shape if (softmax): out = conv_out.reshape([conv_out.shape[1], self.num_points]).dimshuffle(1, 0) self.p_y_given_x = T.nnet.softmax(out) self.y_pred = T.argmax(self.p_y_given_x, axis=1)
def __init__(self, input, n_in_maps, n_out_maps, kernel_shape, video_shape, batch_size, numpy_rng, activation, W=None, b=None, border_mode='valid'): """ video_shape: (frames, height, width) kernel_shape: (frames, height, width) W_shape: (out, in, kern_frames, kern_height, kern_width) """ self.__dict__.update(locals()) del self.self self.activation = activation # init W if W == None: # fan in: filter time x filter height x filter width x input maps fan_in = prod(kernel_shape) * n_in_maps norm_scale = 2. * sqrt(1. / fan_in) if activation in (relu, softplus): norm_scale = 0.01 W_shape = [n_out_maps, n_in_maps] + kernel_shape W_val = _asarray(numpy_rng.normal(loc=0, scale=norm_scale, size=W_shape),\ dtype=floatX) W = shared(value=W_val, borrow=True, name='W') self.W = W # init bias if b == None: if activation in (relu, softplus): b_val = ones((n_out_maps, ), dtype=floatX) else: b_val = zeros((n_out_maps, ), dtype=floatX) b = shared(b_val, name="b", borrow=True) self.b = b # 3D convolution; dimshuffle: last 3 dimensions must be (in, h, w) n_fr, h, w = video_shape n_fr_k, h_k, w_k = kernel_shape out = conv3d(signals=input.dimshuffle([0, 2, 1, 3, 4]), filters=self.W, signals_shape=(batch_size, n_fr, n_in_maps, h, w), filters_shape=(n_out_maps, n_fr_k, n_in_maps, h_k, w_k), border_mode=border_mode).dimshuffle([0, 2, 1, 3, 4]) out += self.b.dimshuffle('x', 0, 'x', 'x', 'x') self.delta_W = shared(value=zeros([n_out_maps, n_in_maps] + kernel_shape, dtype=floatX), name='delta_W') self.delta_b = shared(value=zeros_like(self.b.get_value(borrow=True), dtype=floatX), name='delta_b') self.output = activation(out) self.params = [self.W, self.b] self.delta_params = [self.delta_W, self.delta_b]