def dynamic_kmaxPooling(self, curConv_out, k): neighborsForPooling = TSN.images2neibs(ten4=curConv_out, neib_shape=(1,curConv_out.shape[3]), mode='ignore_borders') self.neighbors = neighborsForPooling neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) kNeighborsArg = neighborsArgSorted[:,-k:] #self.bestK = kNeighborsArg kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() pooledkmaxTmp = neighborsForPooling[ii, jj] new_shape = T.cast(T.join(0, T.as_tensor([neighborsForPooling.shape[0]]), T.as_tensor([k])), 'int64') pooledkmax_matrix = T.reshape(pooledkmaxTmp, new_shape, ndim=2) rightWidth=self.unifiedWidth-k right_padding = T.zeros((neighborsForPooling.shape[0], rightWidth), dtype=theano.config.floatX) matrix_padded = T.concatenate([pooledkmax_matrix, right_padding], axis=1) #recover tensor form new_shape = T.cast(T.join(0, curConv_out.shape[:-2], T.as_tensor([curConv_out.shape[2]]), T.as_tensor([self.unifiedWidth])), 'int64') curPooled_out = T.reshape(matrix_padded, new_shape, ndim=4) return curPooled_out
def unpool_switch_2d(input, ds, st=None, index_type='flattened', index_scope='local', original_input_shape=None): if input.ndim < 3: raise NotImplementedError('unpool_switched_2d requires a dimension >= 3') if input.ndim == 4: op = UnpoolSwitch(ds, st=st, index_type=index_type, index_scope=index_scope, original_input_shape=original_input_shape) output = op(input) return output # extract image dimensions img_shape = input.shape[-3:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-3]) batch_size = T.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = T.cast(T.join(0, batch_size, img_shape), 'int64') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = UnpoolSwitch(ds, st=st, index_type=index_type, index_scope=index_scope, original_input_shape=original_input_shape) output = op(input_4D) # restore to original shape outshp = T.join(0, input.shape[:-2], output.shape[-2:]) return T.reshape(output, outshp, ndim=input.ndim)
def max_pool_2d(input, ds, ignore_border=False): """ Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 2 last dimensions. :type ds: tuple of length 2 :param ds: factor by which to downscale. (2,2) will halve the image in each dimension. :param ignore_border: boolean value. When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. """ if input.ndim < 2: raise NotImplementedError("max_pool_2d requires a dimension >= 2") # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), "int64") input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = DownsampleFactorMax(ds, ignore_border) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def maxpool_3D(input, ds, ignore_border=False): #input.dimshuffle (0, 2, 1, 3, 4) # convert to make video in back. # no need to reshuffle. if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') # extract nr dimensions vid_dim = input.ndim # max pool in two different steps, so we can use the 2d implementation of # downsamplefactormax. First maxpool frames as usual. # Then maxpool the time dimension. Shift the time dimension to the third # position, so rows and cols are in the back # extract dimensions frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size,1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1,]), frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = DownsampleFactorMax((ds[1],ds[2]), ignore_border) # so second and third dimensions of ds are for height and width output = op(input_4D) # restore to original shape outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) # now maxpool time # output (time, rows, cols), reshape so that time is in the back shufl = (list(range(vid_dim-3)) + [vid_dim-2]+[vid_dim-1]+[vid_dim-3]) input_time = out.dimshuffle(shufl) # reset dimensions vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size,1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1,]), vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time op = DownsampleFactorMax((1,ds[0]), ignore_border) # Here the time dimension is downsampled. outtime = op(input_4D_time) # output # restore to original shape (xxx, rows, cols, time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2]) #rval = T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def __init__(self, model, shrinkable=False, nb_neurons_to_add=1): super(GrowiRBM, self).__init__() self.model = model self.shrinkable = shrinkable self.nb_neurons_to_add = nb_neurons_to_add self.maxZ = theano.shared(np.array(0, dtype="int64")) self.grad_W_new_neurons = theano.shared(np.zeros((nb_neurons_to_add, model.input_size), dtype=theano.config.floatX)) zmask_start = model.sample_zmask_given_v(model.CD.chain_start) zmask_end = model.sample_zmask_given_v(model.CD.chain_end) z_start = T.sum(zmask_start, axis=1) z_end = T.sum(zmask_end, axis=1) max_Zs = T.maximum(z_start, z_end) maxZ = max_Zs.max() W_bak = model.W b_bak = model.b model.W = T.join(0, model.W, T.zeros((nb_neurons_to_add, model.input_size), dtype=theano.config.floatX)) model.b = T.join(0, model.b, T.zeros(nb_neurons_to_add, dtype=theano.config.floatX)) cost = model.free_energy(model.CD.chain_start) - model.free_energy(model.CD.chain_end) grad_W_new_neurons = theano.grad(T.mean(cost), model.W)[-nb_neurons_to_add:] model.W = W_bak model.b = b_bak # Will be part of the updates passed to the Theano function `learn` of the trainer. # Notes: all updates are done simultanously, i.e. params haven't been updated yet. self.updates[self.maxZ] = T.cast(maxZ, "int64") self.updates[self.grad_W_new_neurons] = grad_W_new_neurons
def apply(self, application, *args, **kwargs): # extra_ndim is a mandatory parameter, but in order not to # confuse with positional inputs, it has to be extracted from # **kwargs extra_ndim = kwargs.get("extra_ndim", 0) inputs = dict(zip(application.inputs, args)) inputs.update(dict_subset(kwargs, application.inputs, must_have=False)) reshaped_inputs = inputs # To prevent pollution of the computation graph with no-ops if extra_ndim > 0: for name, input_ in inputs.items(): shape, ndim = input_.shape, input_.ndim # Remember extra_dims for reshaping the outputs correctly. # Does not matter from which input, since we assume # extra dimension match for all inputs. extra_dims = shape[:extra_ndim] new_first_dim = tensor.prod(shape[: extra_ndim + 1]) new_shape = tensor.join(0, new_first_dim[None], shape[extra_ndim + 1 :]) reshaped_inputs[name] = input_.reshape(new_shape, ndim=ndim - extra_ndim) outputs = wrapped.__get__(self, None)(**reshaped_inputs) if extra_ndim == 0: return outputs reshaped_outputs = [] for output in pack(outputs): shape, ndim = output.shape, output.ndim new_shape = tensor.join(0, extra_dims, (shape[0] // tensor.prod(extra_dims))[None], shape[1:]) reshaped_outputs.append(output.reshape(new_shape, ndim=ndim + extra_ndim)) return reshaped_outputs
def test_gpujoin_gpualloc(): a = T.fmatrix('a') a_val = numpy.asarray(numpy.random.rand(4, 5), dtype='float32') b = T.fmatrix('b') b_val = numpy.asarray(numpy.random.rand(3, 5), dtype='float32') f = theano.function([a, b], T.join(0, T.zeros_like(a),T.ones_like(b)) + 4, mode=mode_without_gpu) f_gpu = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)), mode=mode_with_gpu) f_gpu2 = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_with_gpu) assert sum([node.op == T.alloc for node in f.maker.env.toposort()]) == 2 assert sum([node.op == T.join for node in f.maker.env.toposort()]) == 1 assert sum([node.op == B.gpu_alloc for node in f_gpu.maker.env.toposort()]) == 2 assert sum([node.op == B.gpu_join for node in f_gpu.maker.env.toposort()]) == 1 assert sum([node.op == B.gpu_alloc for node in f_gpu2.maker.env.toposort()]) == 2 assert sum([node.op == B.gpu_join for node in f_gpu2.maker.env.toposort()]) == 1 assert numpy.allclose(f(a_val, b_val), f_gpu2(a_val, b_val))
def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0), mode='max'): """ Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 2 last dimensions. :type ds: tuple of length 2 :param ds: factor by which to downscale (vertical ds, horizontal ds). (2,2) will halve the image in each dimension. :type ignore_border: bool :param ignore_border: When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. :type st: tuple of lenght 2 :param st: stride size, which is the number of shifts over rows/cols to get the the next pool region. if st is None, it is considered equal to ds (no overlap on pooling regions) :param padding: (pad_h, pad_w), pad zeros to extend beyond four borders of the images, pad_h is the size of the top and bottom margins, and pad_w is the size of the left and right margins. :type padding: tuple of two ints :param mode: 'max', 'average_inc_pad' or 'average_exc_pad'. Operation executed on each window. `max` always excludes the padding in the computation. `average` gives you the choice to include or exclude it. :type mode: string """ if input.ndim < 2: raise NotImplementedError('max_pool_2d requires a dimension >= 2') if input.ndim == 4: op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def test_opt_gpujoin_onlyajoin(): # from a bug in normal sampling _a = numpy.asarray([[1, 2], [3, 4]], dtype='float32') _b = numpy.asarray([[5, 6, 7], [8, 9, 10]], dtype='float32') a = cuda.shared_constructor(_a) b = cuda.shared_constructor(_b) c = tensor.join(1, a, b) f = theano.function([], c, mode=mode_with_gpu) f() graph_nodes = f.maker.fgraph.toposort() assert isinstance(graph_nodes[-1].op, cuda.HostFromGpu) assert isinstance(graph_nodes[-2].op, cuda.GpuJoin) assert numpy.all(f() == numpy.concatenate([_a, _b], axis=1)) # test mixed dtype _b = numpy.asarray([[5, 6, 7], [8, 9, 10]], dtype='float64') b = theano.tensor.constant(_b) c = tensor.join(1, a, b) f = theano.function([], c, mode=mode_with_gpu) f() graph_nodes = f.maker.fgraph.toposort() assert isinstance(graph_nodes[-1].op, theano.tensor.Join) assert numpy.all(f() == numpy.concatenate([_a, _b], axis=1))
def max_pool_switch_2d(input, ds, ignore_border=None, st=None, padding=(0, 0), index_type='flattened', index_scope='local'): if input.ndim < 2: raise NotImplementedError('max_pool_switched_2d requires a dimension >= 2') if ignore_border is None: ignore_border = False if input.ndim == 4: op = MaxPoolSwitch(ds, ignore_border, st=st, padding=padding, index_type=index_type, index_scope=index_scope) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1]), img_shape), 'int64') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = MaxPoolSwitch(ds, ignore_border, st=st, padding=padding, index_type=index_type, index_scope=index_scope) output = op(input_4D) # restore to original shape outshp = T.join(0, input.shape[:-2], output.shape[-2:]) return T.reshape(output, outshp, ndim=input.ndim)
def pad_dims(input, leftdims, rightdims): """Reshapes the input to a (leftdims + rightdims) tensor This helper function is used to convert pooling inputs with arbitrary non-pooling dimensions to the correct number of dimensions for the GPU pooling ops. This reduces or expands the number of dimensions of the input to exactly `leftdims`, by adding extra dimensions on the left or by combining some existing dimensions on the left of the input. Use `unpad_dims` to reshape back to the original dimensions. Examples -------- Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)`` adds a singleton dimension and reshapes to (1, 3, 5, 7). Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)`` reshapes back to (3, 5, 7). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)`` does not reshape and returns output with shape (3, 5, 7, 9). Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)`` combines the first two dimensions and reshapes to (15, 7, 9, 11). Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)`` adds a singleton dimension and reshapes to (1, 3, 5, 7, 9). """ assert input.ndim >= rightdims if input.ndim == (leftdims + rightdims): return input # extract image dimensions img_shape = input.shape[-rightdims:] non_pool_ndim = input.ndim - rightdims if non_pool_ndim < leftdims: # too few dimensions, pad on the left dummy_dims = tensor.as_tensor([1] * (leftdims - non_pool_ndim)) new_shape = tensor.join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape) else: # too many dimensions, combine the leading dimensions batched_ndim = non_pool_ndim - leftdims + 1 batch_size = tensor.prod(input.shape[:batched_ndim]) # convert to a vector for tensor.join batch_size = tensor.shape_padright(batch_size, 1) new_shape = tensor.join(0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape) # store in the required shape new_shape = tensor.cast(new_shape, 'int64') input_ND = GpuReshape(leftdims + rightdims)(input, new_shape) return input_ND
def cost(self,Y,Y_hat): w = T.fscalar() r = self.r w = 0.05 i = T.le(Y,w) j = T.eq(i,0) z = T.join(0,Y[i]/r,Y[j]) z_hat = T.join(0,Y_hat[i]/r,Y_hat[j]) return super(linear_mlp_bayesian_cost,self).cost(z,z_hat)
def max_pool_3d(input, ds, ignore_border=False): """ Takes as input a N-D tensor, where N >= 3. It downscales the input video by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1],ds[2]) (time, height, width) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 3 last dimensions. :type ds: tuple of length 3 :param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension. :param ignore_border: boolean value. Example when True, (5,5,5) input with ds=(2,2,2) will generate a (2,2,2) output. (3,3,3) otherwise. """ if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') vid_dim = input.ndim #Maxpool frame frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size,1) new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = DownsampleFactorMax((ds[1],ds[2]), ignore_border) output = op(input_4D) # restore to original shape outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) #Maxpool time # output (time, rows, cols), reshape so that time is in the back shufl = (list(range(vid_dim-4)) + list(range(vid_dim-3,vid_dim))+[vid_dim-4]) input_time = out.dimshuffle(shufl) # reset dimensions vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size,1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time op = DownsampleFactorMax((1,ds[0]), ignore_border) outtime = op(input_4D_time) # restore to original shape (xxx, rows, cols, time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim-4)) + [vid_dim-1] + list(range(vid_dim-4,vid_dim-1))) #shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2]) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def max_pool_2d(input, ds, ignore_border=False, st=None): """ Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 2 last dimensions. :type ds: tuple of length 2 :param ds: factor by which to downscale (vertical ds, horizontal ds). (2,2) will halve the image in each dimension. :type ignore_border: bool :param ignore_border: When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. :type st: tuple of lenght 2 :param st: stride size, which is the number of shifts over rows/cols to get the the next pool region. if st is None, it is considered equal to ds (no overlap on pooling regions) """ if input.ndim < 2: raise NotImplementedError('max_pool_2d requires a dimension >= 2') if input.ndim == 4: op = DownsampleFactorMax(ds, ignore_border, st=st) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = DownsampleFactorMax(ds, ignore_border, st=st) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def one_lstm_step_wpd(x_t, extra_x_in, h_tm1, c_tm1, a01_tm1, W_xi, W_hi, W_xf, W_hf, W_xc, W_hc, W_xo, W_ho, W01_inattend,att_b2): ######################################### # For Attention ######################################### # 0D - ch-time-freq #att0_e_tl = T.tanh(T.dot(T.join(0, c_tm1, T.join(0, a0_tm1, x_t)), W0_inattend)) #att0_a_tl = T.exp(att0_e_tl)/(T.exp(att0_e_tl)).sum(0,keepdims=True) #att_c_t = att0_a_tl*x_t # 0D2 - ch-time e = T.tanh(T.dot(T.join(0, extra_x_in, T.join(0, c_tm1, T.join(0, a01_tm1, x_t))), W01_inattend)+att_b2) att01_a_tl = T.exp(e)/(T.exp(e)).sum(0,keepdims=True) att01_c_t = T.extra_ops.repeat(att01_a_tl, 40, axis=0)*x_t # (7*5*40)*(7*5*40) att_c_t = att01_c_t if draw != None: att01_c_t = theano.printing.Print('att01_c_t')(att01_c_t) #e = T.tanh(T.dot(T.join(0, c_tm1, T.join(0, a02_tm1, att01_c_t)), W02_inattend)) #att02_a_tl = T.exp(e)/(T.exp(e)).sum(0,keepdims=True) # 40*40 #att_c_t = att02_a_tl*att01_c_t # 1D - timeframe #att1_e_tl = T.tanh(T.dot(T.join(0, c_tm1, T.join(0, a1_tm1, x_t)), W1_inattend)) #att1_a_tl = T.exp(att1_e_tl)/(T.exp(att1_e_tl)).sum(0,keepdims=True) #att1_c_t = T.dot(att1_a_tl, x_t.reshape((7,5*40))).flatten() # (1,7) * ((7,5*40)) => (5*40) # 2D - channel #att2_e_tl = T.tanh(T.dot(T.join(0, c_tm1, T.join(0, a2_tm1, att1_c_t)), W2_inattend)) #att2_a_tl = T.exp(att2_e_tl)/(T.exp(att2_e_tl)).sum(0,keepdims=True) #att2_c_t = T.dot(att2_a_tl, att1_c_t.reshape((5,40))).flatten() # (1,5) * ((5,40)) => (1,40) # 3D - frequency #att3_e_tl = T.tanh(T.dot(T.join(0, c_tm1, T.join(0, a3_tm1, att2_c_t)), W3_inattend)) #att3_a_tl = T.exp(att3_e_tl)/(T.exp(att3_e_tl)).sum(0,keepdims=True) # 40*40 #att_c_t = att3_a_tl*att2_c_t ######################################### # For LSTM ######################################### x_t=att_c_t #rename i_t = T.nnet.sigmoid(theano.dot(x_t, W_xi) + theano.dot(h_tm1, W_hi)) f_t = T.nnet.sigmoid(theano.dot(x_t, W_xf) + theano.dot(h_tm1, W_hf)) c_t = f_t * c_tm1 + i_t * T.tanh(theano.dot(x_t, W_xc) + theano.dot(h_tm1, W_hc) ) o_t = T.nnet.sigmoid(theano.dot(x_t, W_xo)+ theano.dot(h_tm1, W_ho)) h_t = o_t * T.tanh(c_t) return [h_t, c_t, att01_a_tl]
def max_pool_3d(input, ds, ignore_border=False): # [n,c,x,y,z]以外の入力は受け付けない if input.ndim != 5: raise NotImplementedError( 'max_pool_3d requires a input [n, c, x, y, z]') # 入力次元 vid_dim = input.ndim # [y, z]フレームの次元数 frame_shape = input.shape[-2:] # バッチサイズ # フレーム次元以外の全ての次元の要素数を掛け合わせる batch_size = T.prod(input.shape[:-2]) # http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.shape_padright batch_size = T.shape_padright(batch_size, 1) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1, ]), frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) op = DownsampleFactorMax((ds[1], ds[2]), ignore_border) output = op(input_4D) outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) shufl = ( list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [ vid_dim - 3]) input_time = out.dimshuffle(shufl) vid_shape = input_time.shape[-2:] batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1, ]), vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) op = DownsampleFactorMax((1, ds[0]), ignore_border) outtime = op(input_4D_time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = ( list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [ vid_dim - 2]) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def spatial_pyramid(x, fn=max_pool, scales=None, **kwargs): """ Performs pooling over various quadrants of the data and then aggregates the result. :param x: see max_pool :param fn: pointer to function having prototype `function(x, **kwargs)` :param scales: list of quadrants over which to perform pooling. e.g. scales=[1,2] will perform pooling over the entire sequence (jointly), then pool individually over the first and second half of the data. The return vector would then be of length 3. :param kwargs: arguments to pass to max_pool. """ assert DIM_TIME == 0 assert scales for scale in scales: assert isinstance(scale, int) def chunk_pool(idx, x, scale): assert idx.ndim == 0 assert x.ndim == 3 assert scale.ndim == 0 rval = fn(x[idx : idx + x.shape[0] / scale], **kwargs) assert rval.ndim == 2 return rval rval = T.shape_padleft(T.zeros_like(x[0])) for scale in scales: indices = T.arange(0, x.shape[0], x.shape[0] / scale) temp, updates = theano.scan(chunk_pool, sequences = [indices], outputs_info = [None], non_sequences = [x, T.constant(scale)]) rval = T.join(0, rval, temp) return rval[1:]
def test_opt_gpujoin_joinvectors_elemwise_then_minusone(): # from a bug in gpu normal sampling _a = numpy.asarray([1, 2, 3, 4], dtype='float32') _b = numpy.asarray([5, 6, 7, 8], dtype='float32') a = cuda.shared_constructor(_a) b = cuda.shared_constructor(_b) a_prime = tensor.cos(a) b_prime = tensor.sin(b) c = tensor.join(0, a_prime, b_prime) d = c[:-1] f = theano.function([], d, mode=mode_with_gpu) graph_nodes = f.maker.fgraph.toposort() assert isinstance(graph_nodes[-1].op, cuda.HostFromGpu) assert isinstance(graph_nodes[-2].op, cuda.GpuSubtensor) assert isinstance(graph_nodes[-3].op, cuda.GpuJoin) concat = numpy.concatenate([numpy.cos(_a), numpy.sin(_b)], axis=0) concat = concat[:-1] assert numpy.allclose(numpy.asarray(f()), concat)
def _lmul(self, x, T): if T: if len(self.col_shape())>1: x2 = x.flatten(2) else: x2 = x n_rows = x2.shape[0] offset = 0 xWlist = [] assert len(self._col_sizes) == len(self._Wlist) for size, W in zip(self._col_sizes, self._Wlist): # split the output rows into pieces x_s = x2[:,offset:offset+size] # multiply each piece by one transform xWlist.append( W.lmul( x_s.reshape( (n_rows,)+W.col_shape()), T)) offset += size # sum the results rval = tensor.add(*xWlist) else: # multiply the input by each transform xWlist = [W.lmul(x,T).flatten(2) for W in self._Wlist] # join the resuls rval = tensor.join(1, *xWlist) return rval
def test_gpujoin_gpualloc(): a = T.fmatrix("a") a_val = numpy.asarray(numpy.random.rand(4, 5), dtype="float32") b = T.fmatrix("b") b_val = numpy.asarray(numpy.random.rand(3, 5), dtype="float32") f = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_without_gpu) f_gpu = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)), mode=mode_with_gpu) f_gpu2 = theano.function([a, b], T.join(0, T.zeros_like(a), T.ones_like(b)) + 4, mode=mode_with_gpu) assert sum([node.op == T.alloc for node in f.maker.fgraph.toposort()]) == 2 assert sum([node.op == T.join for node in f.maker.fgraph.toposort()]) == 1 assert sum([isinstance(node.op, GpuAlloc) for node in f_gpu.maker.fgraph.toposort()]) == 2 assert sum([node.op == gpu_join for node in f_gpu.maker.fgraph.toposort()]) == 1 assert sum([isinstance(node.op, GpuAlloc) for node in f_gpu2.maker.fgraph.toposort()]) == 2 assert sum([node.op == gpu_join for node in f_gpu2.maker.fgraph.toposort()]) == 1 assert numpy.allclose(f(a_val, b_val), f_gpu2(a_val, b_val))
def link(self, input): self.input = input # select the lines where we apply k-max pooling neighbors_for_pooling = TSN.images2neibs( ten4=self.input, neib_shape=(self.input.shape[2], 1), # we look the max on every dimension mode='valid' # 'ignore_borders' ) neighbors_arg_sorted = T.argsort(neighbors_for_pooling, axis=1) k_neighbors_arg = neighbors_arg_sorted[:, -self.k_max:] k_neighbors_arg_sorted = T.sort(k_neighbors_arg, axis=1) ii = T.repeat(T.arange(neighbors_for_pooling.shape[0]), self.k_max) jj = k_neighbors_arg_sorted.flatten() flattened_pooled_out = neighbors_for_pooling[ii, jj] pooled_out_pre_shape = T.join( 0, self.input.shape[:-2], [self.input.shape[3]], [self.k_max] ) self.output = flattened_pooled_out.reshape( pooled_out_pre_shape, ndim=self.input.ndim ).dimshuffle(0, 1, 3, 2) return self.output
def folding(self, curConv_out): #folding matrix_shape=T.cast(T.join(0, T.as_tensor([T.prod(curConv_out.shape[:-1])]), T.as_tensor([curConv_out.shape[3]])), 'int64') matrix = T.reshape(curConv_out, matrix_shape, ndim=2) odd_matrix=matrix[0:matrix_shape[0]:2] even_matrix=matrix[1:matrix_shape[0]:2] raw_folded_matrix=odd_matrix+even_matrix out_shape=T.cast(T.join(0, curConv_out.shape[:-2], T.as_tensor([curConv_out.shape[2]/2]), T.as_tensor([curConv_out.shape[3]])), 'int64') fold_out=T.reshape(raw_folded_matrix, out_shape, ndim=4) return fold_out
def __init__(self, rng, x, topic_num=100): #input L2_input = sparse.csr_matrix("x",dtype=theano.config.floatX) #params vocab_size = x.shape[1] mu, sigma = x.data.mean(), x.data.var()**0.5 rng = numpy.random.RandomState(numpy.random.randint(2**32-1)) if rng is None else rng self.L2_w = theano.shared(\ numpy.asarray(\ rng.normal(loc=mu,scale=sigma,size=(vocab_size, topic_num)),\ dtype=theano.config.floatX\ ),\ borrow=True\ ) self.L2_b = theano.shared(numpy.zeros(topic_num,dtype=theano.config.floatX), borrow=True) self.params = [self.L2_w, self.L2_b] #stick-breaking:sticks->orthgonal sticks L2_stick = sparse.dot(L2_input,self.L2_w)+self.L2_b-\ 0.5*(L2_input.size/vocab_size*tensor.sum(self.L2_w**2,0)+self.L2_b**2) zero_space = tensor.zeros((L2_input.shape[0],1),dtype=theano.config.floatX) L2_orth_stick = tensor.join(1, L2_stick, zero_space)\ - tensor.join(1, zero_space, tensor.cumsum(L2_stick,1)) Pasterik_orth_stick = tensor.log(1 + tensor.exp(L2_orth_stick)) #training model definition Likelihood = tensor.mean(Pasterik_orth_stick) grads = theano.grad(Likelihood, self.params)#gradient w.r.t params eta = tensor.scalar("eta") updates = [(param, param+eta*grad) for param, grad in zip(self.params, grads)] self._fit = theano.function(\ inputs=[L2_input, eta],\ outputs=Likelihood,\ updates=updates\ ) #predict model definition self._predict = theano.function(\ inputs=[L2_input],\ outputs=tensor.argmax(L2_stick,axis=-1)\ ) self._codec = theano.function(\ inputs=[L2_input],\ outputs=L2_stick>0\ )
def _k_max_pooling(input, kmax): pool = input.dimshuffle(0, 2, 1, 3).flatten(ndim=3).dimshuffle(1,0,2).flatten(ndim=2).dimshuffle(1,0) neighborsArgSorted = T.argsort(pool, axis=1) yy = T.sort(neighborsArgSorted[:, -kmax:], axis=1).flatten() xx = T.repeat(T.arange(neighborsArgSorted.shape[0]), kmax) pool_kmax = pool[xx, yy] pool_kmax_shape = T.join(0, T.as_tensor([input.shape[0], input.shape[1], input.shape[3], kmax])) pooled_out = pool_kmax.reshape(pool_kmax_shape, ndim=4).dimshuffle(0, 1, 3, 2) return pooled_out
def symb_forward(self, symb_input): """ 3d max pooling taken from github.com/lpigou/Theano-3D-ConvNet/ (with modified shuffeling) """ if symb_input.ndim < 5: raise NotImplementedError('max pooling 3D requires a dimension >= 5') height_width_shape = symb_input.shape[-2:] batch_size = _T.prod(symb_input.shape[:-2]) batch_size = _T.shape_padright(batch_size, 1) new_shape = _T.cast(_T.join(0, batch_size, _T.as_tensor([1,]), height_width_shape), 'int32') input_4d = _T.reshape(symb_input, new_shape, ndim=4) # downsample height and width first # other dimensions contribute to batch_size op = _T.signal.downsample.DownsampleFactorMax((self.k_h, self.k_w), self.ignore_border, st=(self.d_h, self.d_w)) output = op(input_4d) outshape = _T.join(0, symb_input.shape[:-2], output.shape[-2:]) out = _T.reshape(output, outshape, ndim=symb_input.ndim) vol_dim = symb_input.ndim shufl = (list(range(vol_dim-4)) + [vol_dim-2]+[vol_dim-1]+[vol_dim-3]+[vol_dim-4]) input_depth = out.dimshuffle(shufl) vol_shape = input_depth.shape[-2:] batch_size = _T.prod(input_depth.shape[:-2]) batch_size = _T.shape_padright(batch_size,1) new_shape = _T.cast(_T.join(0, batch_size, _T.as_tensor([1,]), vol_shape), 'int32') input_4D_depth = _T.reshape(input_depth, new_shape, ndim=4) # downsample depth # other dimensions contribute to batch_size op = _T.signal.downsample.DownsampleFactorMax((1,self.k_d), self.ignore_border, st=(1,self.d_d)) outdepth = op(input_4D_depth) outshape = _T.join(0, input_depth.shape[:-2], outdepth.shape[-2:]) shufl = (list(range(vol_dim-4)) + [vol_dim-1]+[vol_dim-2]+[vol_dim-4]+[vol_dim-3]) return _T.reshape(outdepth, outshape, ndim=symb_input.ndim).dimshuffle(shufl)
def get_t_weights(self, t): """ Generate vector of weights allowing selection of current timestep. (if t is not an integer, the weights will linearly interpolate) """ n_seg = self.trajectory_length t_compare = T.arange(n_seg, dtype=theano.config.floatX).reshape((1,n_seg)) diff = abs(T.addbroadcast(t,1) - T.addbroadcast(t_compare,0)) t_weights = T.max(T.join(1, (-diff+1).reshape((n_seg,1)), T.zeros((n_seg,1))), axis=1) return t_weights.reshape((-1,1))
def multiple_l2_norm(tensors): """ Get the L2 norm of multiple tensors. This function is taken from blocks. """ flattened = [T.as_tensor_variable(t).flatten() for t in tensors] flattened = [(t if t.ndim > 0 else t.dimshuffle('x')) for t in flattened] joined = T.join(0, *flattened) return T.sqrt(T.sqr(joined).sum())
def extract_contexts_targets(self, indices_matrix, sentLengths, leftPad): #first pad indices_matrix with zero indices on both side left_padding = T.zeros((indices_matrix.shape[0], self.window), dtype=theano.config.floatX) right_padding = T.zeros((indices_matrix.shape[0], self.window), dtype=theano.config.floatX) matrix_padded = T.concatenate([left_padding, indices_matrix, right_padding], axis=1) leftPad=leftPad+self.window #a vector plus a number # x, y indices max_length=T.max(sentLengths) x=T.repeat(T.arange(self.batch_size), max_length) y=[] for row in range(self.batch_size): y.append(T.repeat((T.arange(leftPad[row], leftPad[row]+sentLengths[row]),), max_length, axis=0).flatten()[:max_length]) y=T.concatenate(y, axis=0) #construct xx, yy for context matrix context_x=T.repeat(T.arange(self.batch_size), max_length*self.context_size) #wenpeng=theano.printing.Print('context_x')(context_x) context_y=[] for i in range(self.window, 0, -1): # first consider left window context_y.append(y-i) if not self.only_left_context: for i in range(self.window): # first consider left window context_y.append(y+i+1) context_y_list=T.concatenate(context_y, axis=0) new_shape = T.cast(T.join(0, T.as_tensor([self.context_size]), T.as_tensor([self.batch_size*max_length])), 'int64') context_y_vector=T.reshape(context_y_list, new_shape, ndim=2).transpose().flatten() new_shape = T.cast(T.join(0, T.as_tensor([self.batch_size]), T.as_tensor([self.context_size*max_length])), 'int64') context_matrix = T.reshape(matrix_padded[context_x,context_y_vector], new_shape, ndim=2) new_shape = T.cast(T.join(0, T.as_tensor([self.batch_size]), T.as_tensor([max_length])), 'int64') target_matrix = T.reshape(matrix_padded[x,y], new_shape, ndim=2) return T.cast(context_matrix, 'int64'), T.cast(target_matrix, 'int64')
def unpad_dims(output, input, leftdims, rightdims): """Reshapes the output after pad_dims. This reverts the padding by `pad_dims`. """ if output.ndim == input.ndim: return output # restore the output to the original shape outshp = tensor.join(0, input.shape[:-rightdims], output.shape[-rightdims:]) return GpuReshape(input.ndim)(output, outshp)
def flat_join(*args): # Reduce all inputs to vector #(https://groups.google.com/forum/#!msg/theano-users/A-RcItll8eA/z8eZyrTwX9wJ) join_args = [] for i,arg in enumerate(args): if arg.type.ndim: # it is not a scalar join_args.append(arg.flatten()) else: join_args.append( T.shape_padleft(arg)) # join them into a vector return T.join(0, *join_args)
def __init__(self, rng, inputVar, cfgParams, copyLayer=None, layerNum=None): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type inputVar: theano.tensor.dtensor4 :param inputVar: symbolic image tensor, of shape image_shape :type cfgParams: ConvPoolLayerParams """ assert isinstance(cfgParams, ConvPoolLayerParams) floatX = theano.config.floatX # @UndefinedVariable filter_shape = cfgParams.filter_shape image_shape = cfgParams.image_shape filter_stride = cfgParams.stride poolsize = cfgParams.poolsize poolType = cfgParams.poolType activation = cfgParams.activation inputDim = cfgParams.inputDim border_mode = cfgParams.border_mode self.cfgParams = cfgParams self.layerNum = layerNum assert image_shape[1] == filter_shape[1] self.inputVar = inputVar # there are "num inputVar feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize) / numpy.prod(filter_stride)) if not (copyLayer is None): self.W = copyLayer.W else: W_bound = 1. / (fan_in + fan_out) wInitVals = numpy.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=floatX) self.W = theano.shared(wInitVals, borrow=True, name='convW{}'.format(layerNum)) # the bias is a 1D tensor -- one bias per output feature map if not (copyLayer is None): self.b = copyLayer.b else: b_values = numpy.zeros((filter_shape[0],), dtype=floatX) self.b = theano.shared(value=b_values, borrow=True, name='convB{}'.format(layerNum)) if border_mode == 'same': # convolve inputVar feature maps with filters conv_out = conv2d(input=inputVar, filters=self.W, filter_shape=filter_shape, input_shape=image_shape, subsample=filter_stride, border_mode='full') # perform full convolution and crop output of input size offset_2 = filter_shape[2]//2 offset_3 = filter_shape[3]//2 conv_out = conv_out[:, :, offset_2:offset_2+image_shape[2], offset_3:offset_3+image_shape[3]] else: conv_out = conv2d(input=inputVar, filters=self.W, filter_shape=filter_shape, input_shape=image_shape, subsample=filter_stride, border_mode=border_mode) # downsample each feature map individually, using maxpooling if poolType == 0: # using maxpooling if poolsize != (1, 1): pooled_out = pool_2d(input=conv_out, ds=poolsize, ignore_border=True) else: pooled_out = conv_out elif poolType == 1: # using average pooling pooled_out = theano.sandbox.neighbours.images2neibs(ten4=conv_out, neib_shape=poolsize, mode='ignore_borders').mean(axis=-1) new_shape = T.cast(T.join(0, conv_out.shape[:-2], T.as_tensor([conv_out.shape[2]//poolsize[0]]), T.as_tensor([conv_out.shape[3]//poolsize[1]])), 'int64') pooled_out = T.reshape(pooled_out, new_shape, ndim=4) elif poolType == -1: # no pooling at all pooled_out = conv_out # add the bias term. Since the bias is a vector (1D array), we first reshape it to a tensor of shape # (1,n_filters,1,1). Each bias will thus be broadcasted across mini-batches and feature map width & height lin_output = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x') self.output = (lin_output if activation is None else activation(lin_output)) self.output.name = 'output_layer_{}'.format(self.layerNum) # store parameters of this layer self.params = [self.W, self.b] self.weights = [self.W]
def dnc_step( s_x_, s_lstm_cell_, s_lstm_hid_, s_usage_, s_preced_, s_link_, s_mem_, s_read_val_, s_read_wgt_, s_write_wgt_): s_states_li_ = [ s_lstm_cell_, s_lstm_hid_, s_usage_, s_preced_, s_link_, s_mem_, s_read_val_, s_read_wgt_, s_write_wgt_] s_inp = T.join(-1, s_x_, s_read_val_.flatten()) s_lstm_cell_tp1, s_lstm_hid_tp1 = lyr.lyr_lstm( 'ctrl', s_inp, s_lstm_cell_, s_lstm_hid_, ctrl_inp_size, ctrl_wm_size ) s_out, s_itrface = T.split( lyr.lyr_linear( 'ctrl_out', s_lstm_hid_tp1, ctrl_wm_size, ctrl_wm_size, bias_=None), [OUT_DIMS,itrface_size],2, axis=-1) splits_len = [ N_READS*CELL_SIZE, N_READS, CELL_SIZE, 1, CELL_SIZE, CELL_SIZE, N_READS, 1, 1, 3*N_READS ] s_keyr, s_strr, s_keyw, s_strw, \ s_ers, s_write, s_freeg, s_allocg, s_writeg, s_rmode = \ T.split(s_itrface, splits_len, 10, axis=-1) s_keyr = T.reshape(s_keyr, (CELL_SIZE,N_READS)) s_strr = 1.+T.nnet.softplus(s_strr) s_strw = 1.+T.nnet.softplus(s_strw[0]) s_ers = T.nnet.sigmoid(s_ers) s_freeg = T.nnet.sigmoid(s_freeg) s_allocg = T.nnet.sigmoid(s_allocg[0]) s_writeg = T.nnet.sigmoid(s_writeg[0]) s_rmode = T.nnet.softmax(T.reshape(s_rmode,(N_READS,3))).dimshuffle(1,0,'x') s_mem_retention = T.prod( 1.-s_freeg.dimshuffle(0,'x')*s_read_wgt_, axis=0) s_usage_tp1 = s_mem_retention*( s_usage_+s_write_wgt_-s_usage_*s_write_wgt_) s_usage_order = T.argsort(s_usage_tp1) s_usage_order_inv = T.inverse_permutation(s_usage_order) s_usage_tp1_sorted = s_usage_tp1[s_usage_order] s_alloc_wgt = ((1.-s_usage_tp1_sorted)*( T.join( 0,np.array([1.],dtype=th.config.floatX), op_cumprod_hack(s_usage_tp1_sorted[:-1]) )))[s_usage_order_inv] s_content_wgt_w = T.nnet.softmax( s_strw*T.dot(s_mem_, s_keyw)/( T.sqrt( EPS+T.sum(T.sqr(s_mem_),axis=-1)*T.sum(T.sqr(s_keyw)))) ).flatten() s_write_wgt_tp1 = s_writeg*( s_allocg*s_alloc_wgt+(1.-s_allocg)*s_content_wgt_w) s_mem_tp1 = s_mem_*( 1.-T.outer(s_write_wgt_tp1,s_ers))+T.outer(s_write_wgt_tp1,s_write) s_preced_tp1 = (1.-T.sum(s_write_wgt_))*s_preced_ + s_write_wgt_tp1 s_link_tp1 = ( 1.-s_write_wgt_tp1-s_write_wgt_tp1.dimshuffle(0,'x') )*s_link_ + T.outer(s_write_wgt_tp1,s_preced_) s_link_tp1 = s_link_tp1 * (1.-T.identity_like(s_link_tp1))#X s_fwd = T.dot(s_read_wgt_, s_link_tp1.transpose())#X s_bwd = T.dot(s_read_wgt_, s_link_tp1)#X s_content_wgt_r= T.nnet.softmax(T.dot(s_mem_tp1, s_keyr)/(T.sqrt( EPS+T.outer( T.sum(T.sqr(s_mem_tp1),axis=-1),T.sum(T.sqr(s_keyr),axis=0) )))).transpose() s_read_wgt_tp1 = s_bwd*s_rmode[0]+s_content_wgt_r*s_rmode[1]+s_fwd*s_rmode[2] s_read_val_tp1 = T.dot(s_read_wgt_tp1, s_mem_tp1) s_y = s_out + lyr.lyr_linear( 'read_out', s_read_val_tp1.flatten(), CELL_SIZE*N_READS,OUT_DIMS, bias_=None) return [ s_y, s_lstm_cell_tp1, s_lstm_hid_tp1, s_usage_tp1, s_preced_tp1, s_link_tp1, s_mem_tp1, s_read_val_tp1, s_read_wgt_tp1, s_write_wgt_tp1]
def jobman(_options, channel=None): ################### PARSE INPUT ARGUMENTS ####################### o = parse_input_arguments(_options, 'RNN_theano/rnn_stream001/RNN_stream.ini') ####################### DEFINE THE TASK ######################### mode = Mode(linker='cvm', optimizer='fast_run') rng = numpy.random.RandomState(o['seed']) train_set = spike_numbers(n_outs=o['n_outs'], T=o['task_T'], inrange=o['task_inrange'], max_val=o['task_max_val'], min_val=o['task_min_val'], batches=o['task_train_batches'], batch_size=o['task_train_batchsize'], noise=o['task_noise'], rng=rng) valid_set = spike_numbers(n_outs=o['n_outs'], T=o['task_T'], inrange=o['task_inrange'], max_val=o['task_max_val'], min_val=o['task_min_val'], batches=o['task_valid_batches'], batch_size=o['task_valid_batchsize'], rng=rng) test_set = spike_numbers(n_outs=o['n_outs'], T=o['task_T'], inrange=o['task_inrange'], max_val=o['task_max_val'], min_val=o['task_min_val'], batches=o['task_test_batches'], batch_size=o['task_test_batchsize'], rng=rng) if o['wout_pinv']: wout_set = spike_numbers(n_outs=o['n_outs'], T=o['task_T'], inrange=o['task_inrange'], max_val=o['task_max_val'], min_val=o['task_min_val'], batches=o['task_wout_batches'], batch_size=o['task_wout_batchsize'], noise=o['task_wout_noise'], rng=rng) ###################### DEFINE THE MODEL ######################### def recurrent_fn(u_t, h_tm1, W_hh, W_ux, W_hy, b): x_t = TT.dot(W_ux, u_t) h_t = TT.tanh(TT.dot(W_hh, h_tm1) + x_t + b) y_t = TT.dot(W_hy, h_t) return h_t, y_t u = TT.tensor3('u') if o['error_over_all']: t = TT.tensor3('t') else: t = TT.matrix('t') h0 = TT.matrix('h0') b = shared_shape( floatX( numpy.random.uniform(size=(o['nhid'], ), low=-o['Wux_properties']['scale'], high=o['Wux_properties']['scale']))) alpha = TT.scalar('alpha') lr = TT.scalar('lr') W_hh = init(o['nhid'], o['nhid'], 'W_hh', o['Whh_style'], o['Whh_properties'], rng) W_ux = init(o['nhid'], train_set.n_ins, 'W_ux', o['Wux_style'], o['Wux_properties'], rng) W_hy = init(o['n_outs'], o['nhid'], 'W_hy', o['Why_style'], o['Why_properties'], rng) [h, y ], _ = theano.scan(recurrent_fn, sequences=u, outputs_info=[h0, None], non_sequences=[W_hh, W_ux, W_hy, TT.shape_padright(b)], name='recurrent_fn', mode=mode) init_h = h.owner.inputs[0].owner.inputs[2] #h = theano.printing.Print('h',attrs=('shape',))(h) if o['error_over_all']: out_err = TT.mean(TT.mean((y - t)**2, axis=0), axis=1) err = out_err.mean() else: out_err = ((y[-1] - t)**2).mean(axis=1) err = out_err.mean() # Regularization term if o['reg_projection'] == 'h[-1]': cost = h[-1].sum() elif o['reg_projection'] == 'err': cost = err elif o['reg_projection'] == 'random': trng = TT.shared_randomstreams.RandomStreams(rng.randint(1e6)) proj = trng.uniform(size=h[-1].shape) if o['sum_h2'] > 0: proj = TT.join(0, proj[:o['sum_h2']], TT.zeros_like(proj[o['sum_h2']:])) cost = TT.sum(proj * h[-1]) z, gh = TT.grad(cost, [init_h, h]) z.name = '__z__' z = z[:-1] - gh if o['sum_h'] > 0: z2 = TT.sum(z[:, :o['sum_h']]**2, axis=1) else: z2 = TT.sum(z**2, axis=1) v1 = z2[:-1] v2 = z2[1:] ## ## v2 = theano.printing.Print('v2')(v2) # floatX(1e-14) ratios = TT.switch(TT.ge(v2, 1e-12), TT.sqrt(v1 / v2), floatX(1)) norm_0 = TT.ones_like(ratios[0]) norm_t, _ = theano.scan(lambda x, y: x * y, sequences=ratios, outputs_info=norm_0, name='jacobian_products', mode=mode) norm_term = TT.sum(TT.mean(norm_t, axis=1)) if o['reg_cost'] == 'product': r = TT.mean(abs(TT.log(norm_t)), axis=1).sum() elif o['reg_cost'] == 'each': r = TT.mean(abs(TT.log(ratios)), axis=1).sum() elif o['reg_cost'] == 'product2': ratios2 = TT.switch(TT.ge(z2[-1], 1e-12), TT.sqrt(z2 / z2[-1]), floatX(1)) r = TT.mean(abs(TT.log(ratios2)), axis=1).sum() ratios = TT.switch(TT.ge(v2, 1e-12), TT.sqrt(v1 / v2), floatX(1e-12))[::-1] norm_0 = TT.ones_like(ratios[0]) norm_t, _ = theano.scan(lambda x, y: x * y, sequences=ratios, outputs_info=norm_0, name='jacobian_products', mode=mode) norm_term = floatX(0.1) + TT.sum(TT.mean(norm_t, axis=1)) gu = TT.grad(y[-1].sum(), u) if o['opt_alg'] == 'sgd': get_updates = lambda p, e, up: (sgd( p, e, lr=lr, scale=my1 / norm_term, updates=up)[0], [[], [ ], [TT.constant(0) for x in p]]) elif o['opt_alg'] == 'sgd_qn': get_updates = lambda p, e, up: sgd_qn(p, e, mylambda=floatX(o['mylambda']), t0=floatX(o['t0']), skip=floatX(o['skip']), scale=my1 / norm_term, lazy=o['lazy'], updates=up) if o['win_reg']: updates, why_extra = get_updates([W_hy], err, {}) cost = err + alpha * r updates, extras = get_updates([W_ux, W_hh, b], cost, updates) b_Why = why_extra[2][0] b_Wux = extras[2][0] b_Whh = extras[2][1] b_b = extras[2][2] else: updates, extras1 = get_updates([W_hy, W_ux], err, {}) cost = err + alpha * r updates, extras2 = get_updates([W_hh, b], cost, updates) b_Why = extras1[2][0] b_Wux = extras1[2][1] b_Whh = extras2[2][0] b_b = extras2[2][1] nhid = o['nhid'] train_batchsize = o['task_train_batchsize'] valid_batchsize = o['task_valid_batchsize'] test_batchsize = o['task_test_batchsize'] wout_batchsize = o['task_wout_batchsize'] train_h0 = shared_shape(floatX(numpy.zeros((nhid, train_batchsize)))) valid_h0 = shared_shape(floatX(numpy.zeros((nhid, valid_batchsize)))) test_h0 = shared_shape(floatX(numpy.zeros((nhid, test_batchsize)))) wout_h0 = shared_shape(floatX(numpy.zeros((nhid, wout_batchsize)))) idx = TT.iscalar('idx') train_u, train_t = train_set(idx) u.tag.shape = copy.copy(train_u.tag.shape) t.tag.shape = copy.copy(train_t.tag.shape) train = theano.function([u, t, lr, alpha], [out_err, r, norm_term], updates=updates, mode=mode, givens={h0: train_h0}) valid_u, valid_t = valid_set(idx) u.tag.shape = copy.copy(valid_u.tag.shape) t.tag.shape = copy.copy(valid_t.tag.shape) valid = theano.function([u, t], [out_err, r, norm_term], mode=mode, givens={h0: valid_h0}) test_u, test_t = test_set(idx) u.tag.shape = copy.copy(test_u.tag.shape) t.tag.shape = copy.copy(test_t.tag.shape) test = theano.function([u, t], [ out_err, r, norm_term, W_hh, W_ux, W_hy, b, z, y, h, u, gu, t, b_Whh, b_Wux, b_Why, b_b ], mode=mode, givens={h0: test_h0}) if o['wout_pinv']: wout_u, wout_t = wout_set.get_whole_tensors() def wiener_hopf_fn(u_t, t_t, H_tm1, Y_tm1, W_hh, W_ux, b, h0): def recurrent_fn(u_t, h_tm1, W_hh, W_ux, b): x_t = TT.dot(W_ux, u_t) h_t = TT.tanh(TT.dot(W_hh, h_tm1) + x_t + b) return h_t h_t, _ = theano.scan(recurrent_fn, sequences=u_t, outputs_info=h0, non_sequences=[W_hh, W_ux, b], name='recurrent_fn', mode=mode) H_t = H_tm1 + TT.dot(h_t[-1], h_t[-1].T) Y_t = Y_tm1 + TT.dot(h_t[-1], t_t.T) return H_t, Y_t H_0 = shared_shape(numpy.zeros((o['nhid'], o['nhid']), dtype=theano.config.floatX), name='H0') Y_0 = shared_shape(numpy.zeros((o['nhid'], o['n_outs']), dtype=theano.config.floatX), name='Y0') all_u = TT.tensor4('whole_u') all_t = TT.tensor3('whole_t') [H, Y], _ = theano.scan( wiener_hopf_fn, sequences=[all_u, all_t], outputs_info=[H_0, Y_0], non_sequences=[W_hh, W_ux, TT.shape_padright(b), h0], name='wiener_hopf_fn', mode=mode) length = TT.cast(all_u.shape[0] * all_u.shape[3], dtype=theano.config.floatX) H = H[-1] / length Y = Y[-1] / length H = H + floatX(o['wiener_lambda']) * TT.eye(o['nhid']) W_hy_solve = theano_linalg.solve(H, Y).T wout = theano.function([idx], [], mode=mode, updates={W_hy: W_hy_solve}, givens={ all_u: wout_u, all_t: wout_t, h0: wout_h0 }) ''' theano.printing.pydotprint(train, 'train.png', high_contrast=True, with_ids= True) for idx,node in enumerate(train.maker.env.toposort()): if node.op.__class__.__name__ == 'Scan': theano.printing.pydotprint(node.op.fn, ('train%d_'%idx)+node.op.name, high_contrast = True, with_ids = True) theano.printing.pydotprint(train, 'valid.png', high_contrast=True, with_ids = True) for idx,node in enumerate(train.maker.env.toposort()): if node.op.__class__.__name__ == 'Scan': theano.printing.pydotprint(node.op.fn, ('valid%d_'%idx)+node.op.name, high_contrast = True, with_ids = True) theano.printing.pydotprint(train, 'test.png', high_contrast=True, with_ids = True) for idx,node in enumerate(train.maker.env.toposort()): if node.op.__class__.__name__ == 'Scan': theano.printing.pydotprint(node.op.fn, ('test%d_'%idx)+node.op.name, high_contrast = True, with_ids = True) if o['wout_pinv']: theano.printing.pydotprint(train, 'wout.png', high_contrast=True, with_ids = True) for idx,node in enumerate(train.maker.env.toposort()): if node.op.__class__.__name__ == 'Scan': theano.printing.pydotprint(node.op.fn, ('wout%d_'%idx)+node.op.name, high_contrast = True, with_ids= True) ''' valid_set.refresh() #import GPUscan.ipdb; GPUscan.ipdb.set_trace() #rval = valid(valid_set.data_u[0],valid_set.data_t[0]) #################### DEFINE THE MAIN LOOP ####################### data = {} fix_len = o['max_storage_numpy'] #int(o['NN']/o['small_step']) avg_train_err = numpy.zeros((o['small_step'], o['n_outs'])) avg_train_reg = numpy.zeros((o['small_step'], )) avg_train_norm = numpy.zeros((o['small_step'], )) avg_valid_err = numpy.zeros((o['small_step'], o['n_outs'])) avg_valid_reg = numpy.zeros((o['small_step'], )) avg_valid_norm = numpy.zeros((o['small_step'], )) data['options'] = o data['train_err'] = -1 * numpy.ones((fix_len, o['n_outs'])) data['valid_err'] = -1 * numpy.ones((fix_len, o['n_outs'])) data['train_reg'] = -1 * numpy.ones((fix_len, )) data['valid_reg'] = -1 * numpy.ones((fix_len, )) data['train_norm'] = numpy.zeros((fix_len, )) data['valid_norm'] = numpy.zeros((fix_len, )) data['test_err'] = [None] * o['max_storage'] data['test_idx'] = [None] * o['max_storage'] data['test_reg'] = [None] * o['max_storage'] data['test_norm'] = [None] * o['max_storage'] data['y'] = [None] * o['max_storage'] data['z'] = [None] * o['max_storage'] data['t'] = [None] * o['max_storage'] data['h'] = [None] * o['max_storage'] data['u'] = [None] * o['max_storage'] data['gu'] = [None] * o['max_storage'] data['W_hh'] = [None] * o['max_storage'] data['W_ux'] = [None] * o['max_storage'] data['W_hy'] = [None] * o['max_storage'] data['b'] = [None] * o['max_storage'] data['b_ux'] = [None] * o['max_storage'] data['b_hy'] = [None] * o['max_storage'] data['b_hh'] = [None] * o['max_storage'] data['b_b'] = [None] * o['max_storage'] storage_exceeded = False stop = False old_rval = numpy.inf patience = o['patience'] n_train = o['task_train_batches'] n_valid = o['task_valid_batches'] n_test = o['task_test_batches'] n_test_runs = 0 test_pos = 0 valid_set.refresh() test_set.refresh() kdx = 0 lr_v = floatX(o['lr']) alpha_v = floatX(o['alpha']) lr_f = 1 if o['lr_scheme']: lr_f = o['lr_scheme'][1] / (o['NN'] - o['lr_scheme'][0]) alpha_r = 1 if o['alpha_scheme']: alpha_r = float(o['alpha_scheme'][1] - o['alpha_scheme'][0]) st = time.time() if channel: try: channel.save() except: pass for idx in xrange(int(o['NN'])): if o['lr_scheme'] and idx > o['lr_scheme'][0]: lr_v = floatX(o['lr'] * 1. / (1. + (idx - o['lr_scheme'][0]) * lr_f)) if o['alpha_scheme']: if idx < o['alpha_scheme'][0]: alpha_v = floatX(0) elif idx < o['alpha_scheme'][1]: pos = 2. * (idx - o['alpha_scheme'][0]) / alpha_r - 1. alpha_v = floatX(numpy.exp(-pos**2 / 0.2) * o['alpha']) else: alpha_v = floatX(0) jdx = idx % o['small_step'] avg_train_err[jdx, :] = 0 avg_train_reg[jdx] = 0 avg_train_norm[jdx] = 0 avg_valid_err[jdx, :] = 0 avg_valid_reg[jdx] = 0 avg_valid_norm[jdx] = 0 if o['wout_pinv'] and (idx % o['test_step'] == 0): wout_set.refresh() print( '* Re-computing W_hy using closed-form ' 'regularized wiener hopf formula') st_wout = time.time() wout(0) ed_wout = time.time() print '** It took ', ed_wout - st_wout, 'secs' print '** Average weight', abs(W_hy.get_value(borrow=True)).mean() print '*Re-generate training set ' st_gen = time.time() train_set.refresh() print '**Generation took', time.time() - st_gen, 'secs' for k in xrange(o['task_train_batches']): rval = train(train_set.data_u[k], train_set.data_t[k], lr_v, alpha_v) print '[',idx,'/',patience,'][',k,'/',n_train,'][train]', rval[0].mean(), \ rval[1], rval[2], (1./rval[2])*lr_v, alpha_v avg_train_err[jdx, :] += rval[0] avg_train_reg[jdx] += rval[1] avg_train_norm[jdx] += rval[2] train_set.clean() print '**Epoch took', time.time() - st, 'secs' avg_train_err[jdx] /= n_train avg_train_reg[jdx] /= n_train avg_train_norm[jdx] /= n_train st = time.time() for k in xrange(n_valid): rval = valid(valid_set.data_u[k], valid_set.data_t[k]) print '[',idx,'/',patience,'][',k,'/',n_valid,'][valid]', rval[0].mean(), \ rval[1], rval[2] avg_valid_err[jdx] += rval[0] avg_valid_reg[jdx] += rval[1] avg_valid_norm[jdx] += rval[2] avg_valid_err[jdx] /= n_valid avg_valid_reg[jdx] /= n_valid avg_valid_norm[jdx] /= n_valid if idx >= o['small_step'] and idx % o['small_step'] == 0: kdx += 1 if kdx >= o['max_storage_numpy']: kdx = o['max_storage_numpy'] // 3 storage_exceeded = True data['steps'] = idx data['kdx'] = kdx data['storage_exceeded'] = storage_exceeded data['train_err'][kdx] = avg_train_err.mean() data['valid_err'][kdx] = avg_valid_err.mean() data['train_reg'][kdx] = avg_train_reg.mean() data['valid_reg'][kdx] = avg_valid_reg.mean() data['train_norm'][kdx] = avg_train_norm.mean() data['valid_norm'][kdx] = avg_valid_norm.mean() if channel: try: _options['trainerr'] = data['train_err'][kdx].mean() _options['maxtrainerr'] = data['train_err'][kdx].max() _options['trainreg'] = data['train_reg'][kdx] _options['trainnorm'] = data['train_norm'][kdx] _options['validerr'] = data['valid_err'][kdx].mean() _options['maxvaliderr'] = data['valid_err'][kdx].max() _options['validreg'] = data['valid_reg'][kdx] _options['validnorm'] = data['valid_norm'][kdx] _options['steps'] = idx _options['patience'] = patience channel.save() except: pass test_err = [] test_reg = [] test_norm = [] for k in xrange(n_test): rval = test(test_set.data_u[k], test_set.data_t[k]) print '[',idx,'][',k,'/',n_test,'][test]',rval[0].mean()\ , rval[1], rval[2] test_err += [rval[0]] test_reg += [rval[1]] test_norm += [rval[2]] test_z = rval[7][:, :, :10] test_y = rval[8][:, :, :10] test_h = rval[9][:, :, :10] test_u = rval[10][:, :, :10] test_gu = rval[11][:, :, :10] test_t = rval[12][:, :10] data['test_idx'][test_pos] = idx data['test_pos'] = test_pos data['y'][test_pos] = test_y data['z'][test_pos] = test_z data['t'][test_pos] = test_t data['h'][test_pos] = test_h data['u'][test_pos] = test_u data['gu'][test_pos] = test_gu data['test_err'][test_pos] = test_err data['test_reg'][test_pos] = test_reg data['test_norm'][test_pos] = test_norm data['W_hh'][test_pos] = rval[3] data['W_ux'][test_pos] = rval[4] data['W_hy'][test_pos] = rval[5] data['b'][test_pos] = rval[6] data['b_hh'][test_pos] = rval[13] data['b_ux'][test_pos] = rval[14] data['b_hy'][test_pos] = rval[15] data['b_b'][test_pos] = rval[16] cPickle.dump( data, open( os.path.join(configs.results_folder(), o['path'], '%s_backup.pkl' % o['name']), 'wb')) print '** ', avg_valid_err[jdx].mean(), ' < ', old_rval, ' ? ' if avg_valid_err[jdx].mean() < old_rval: patience += o['patience_incr'] if avg_valid_err[jdx].mean() < old_rval * 0.997: test_err = [] test_reg = [] test_norm = [] for k in xrange(n_test): rval = test(test_set.data_u[k], test_set.data_t[k]) print '[',idx,'][',k,'/',n_test,'][test]',rval[0].mean()\ , rval[1], rval[2] test_err += [rval[0]] test_reg += [rval[1]] test_norm += [rval[2]] test_z = rval[7][:, :, :10] test_y = rval[8][:, :, :10] test_h = rval[9][:, :, :10] test_u = rval[10][:, :, :10] test_gu = rval[11][:, :, :10] test_t = rval[12][:, :10] data['test_idx'][test_pos] = idx data['test_pos'] = test_pos data['y'][test_pos] = test_y data['z'][test_pos] = test_z data['t'][test_pos] = test_t data['h'][test_pos] = test_h data['u'][test_pos] = test_u data['gu'][test_pos] = test_gu data['test_err'][test_pos] = test_err data['test_reg'][test_pos] = test_reg data['test_norm'][test_pos] = test_norm data['W_hh'][test_pos] = rval[3] data['W_ux'][test_pos] = rval[4] data['W_hy'][test_pos] = rval[5] data['b'][test_pos] = rval[6] data['b_hh'][test_pos] = rval[13] data['b_ux'][test_pos] = rval[14] data['b_hy'][test_pos] = rval[15] data['b_b'][test_pos] = rval[16] cPickle.dump( data, open( os.path.join(configs.results_folder(), o['path'], '%s.pkl' % o['name']), 'wb')) n_test_runs += 1 test_pos += 1 if test_pos >= o['max_storage']: test_pos = test_pos - o['go_back'] if numpy.mean(test_err) < 5e-5: patience = idx - 5 break old_rval = avg_valid_err[jdx].mean() if idx > patience: break
def max_pool_3d(input, ds, ignore_border=False): """ Takes as input a N-D tensor, where N >= 3. It downscales the input video by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1],ds[2]) (time, height, width) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 3 last dimensions. :type ds: tuple of length 3 :param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension. :param ignore_border: boolean value. When True, (5,5,5) input with ds=(2,2,2) will generate a (2,2,2) output. (3,3,3) otherwise. """ if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') # extract nr dimensions vid_dim = input.ndim # max pool in two different steps, so we can use the 2d implementation of # downsamplefactormax. First maxpool frames as usual. # Then maxpool the time dimension. Shift the time dimension to the third # position, so rows and cols are in the back # extract dimensions frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([ 1, ]), frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols output = T.signal.pool.pool_2d(input_4D, (ds[1], ds[2]), ignore_border) # restore to original shape outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) # now maxpool time # output (time, rows, cols), reshape so that time is in the back shufl = (list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [vid_dim - 3]) input_time = out.dimshuffle(shufl) # reset dimensions vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([ 1, ]), vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time outtime = T.signal.pool.pool_2d(input_4D_time, (1, ds[0]), ignore_border) # output # restore to original shape (xxx, rows, cols, time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [vid_dim - 2]) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def __init__(self, numpy_rng=None, theano_rng=None, cfg=[], non_maximum_erasing=False, use_fast=False): self.conv_layers = [] self.n_outs = cfg.n_outs self.layers = [] self.extra_layers = [] self.conv_layer_num = len(cfg.conv_layer_configs) self.dnn_layer_num = len(cfg.hidden_layers_sizes) self.extra_layers_sizes = cfg.extra_layers_sizes self.x = T.tensor4('x') self.extra_x = T.matrix('extra_x') for i in xrange(self.conv_layer_num): if i == 0: input = self.x else: input = self.conv_layers[-1].output config = cfg.conv_layer_configs[i] print config['filter_shape'] conv_layer = ConvLayerForward(numpy_rng=numpy_rng, input=input, filter_shape=config['filter_shape'], poolsize=config['poolsize'], activation=config['activation'], flatten=config['flatten'], use_fast=use_fast) self.layers.append(conv_layer) self.conv_layers.append(conv_layer) self.conv_output_dim = config['output_shape'][1] * config[ 'output_shape'][2] * config['output_shape'][3] cfg.n_ins = config['output_shape'][1] * config['output_shape'][ 2] * config['output_shape'][3] print self.conv_output_dim print cfg.n_ins print 'Extra input dimension: ' + str(cfg.extra_dim) for i in xrange(len(self.extra_layers_sizes)): if i == 0: input_size = cfg.extra_dim layer_input = self.extra_x else: input_size = self.extra_layers_sizes[i - 1] layer_input = self.extra_layers[-1].output W = None b = None attend_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.extra_layers_sizes[i], W=W, b=b) self.extra_layers.append(attend_layer) self.extra_output = self.extra_layers[-1].output self.extra_output = T.nnet.softmax(self.extra_layers[-1].output) print 'layer num: ' + str(len(self.layers) - 1) for i in xrange(self.dnn_layer_num): if i == 0: # 1. Join two features (magnitude + phase) input_size = self.conv_output_dim + self.extra_layers_sizes[-1] layer_input = T.join(1, self.layers[-1].output, self.extra_output) # 2. Weighted Sum (magnitude * phase) #input_size = self.conv_output_dim #layer_input = self.layers[-1].output * self.extra_output else: input_size = cfg.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None b = None hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=cfg.hidden_layers_sizes[i], W=W, b=b) self.layers.append(hidden_layer) print 'layer num: ' + str(len(self.layers) - 1) logLayer = OutputLayer(input=self.layers[-1].output, n_in=cfg.hidden_layers_sizes[-1], n_out=self.n_outs) self.layers.append(logLayer) print 'layer num: ' + str(len(self.layers) - 1)
def maxpool_3D(input, ds, ignore_border=False): #input.dimshuffle (0, 2, 1, 3, 4) # convert to make video in back. # no need to reshuffle. if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') # extract nr dimensions vid_dim = input.ndim # max pool in two different steps, so we can use the 2d implementation of # downsamplefactormax. First maxpool frames as usual. # Then maxpool the time dimension. Shift the time dimension to the third # position, so rows and cols are in the back # extract dimensions frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([ 1, ]), frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = DownsampleFactorMax( (ds[1], ds[2]), ignore_border ) # so second and third dimensions of ds are for height and width output = op(input_4D) # restore to original shape outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) # now maxpool time # output (time, rows, cols), reshape so that time is in the back shufl = (list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [vid_dim - 3]) input_time = out.dimshuffle(shufl) # reset dimensions vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([ 1, ]), vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time op = DownsampleFactorMax( (1, ds[0]), ignore_border) # Here the time dimension is downsampled. outtime = op(input_4D_time) # output # restore to original shape (xxx, rows, cols, time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [vid_dim - 2]) #rval = T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def __init__(self, numpy_rng=None, theano_rng=None, cfg=None, non_maximum_erasing=False, use_fast=False): self.n_outs = cfg.n_outs self.layers = [] self.extra_layers = [] self.conv_layer_num = len(cfg.conv_layer_configs) self.dnn_layer_num = len(cfg.hidden_layers_sizes) self.extra_layers_sizes = cfg.extra_layers_sizes self.x = T.tensor4('x') self.extra_x = T.matrix('extra_x') for i in xrange(self.conv_layer_num): if i == 0: input = self.x else: input = self.layers[-1].output config = cfg.conv_layer_configs[i] conv_layer = ConvLayerForward(numpy_rng=numpy_rng, input=input, filter_shape=config['filter_shape'], poolsize=config['poolsize'], activation=config['activation'], flatten=config['flatten'], use_fast=use_fast) self.layers.append(conv_layer) self.conv_output_dim = config['output_shape'][1] * config[ 'output_shape'][2] * config['output_shape'][3] cfg.n_ins = config['output_shape'][1] * config['output_shape'][ 2] * config['output_shape'][3] for i in xrange(len(self.extra_layers_sizes)): if i == 0: input_size = 6400 * 5 input_size = cfg.extra_dim layer_input = self.extra_x else: input_size = self.extra_layers_sizes[i - 1] layer_input = self.extra_layers[-1].output W = None b = None attend_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.extra_layers_sizes[i], W=W, b=b) self.extra_layers.append(attend_layer) self.extra_layers[-1].att_e_tl = self.extra_layers[-1].output self.extra_layers[-1].att_a_tl = T.nnet.softmax( self.extra_layers[-1].att_e_tl) #self.extra_layers[-1].att_a_tl = T.exp(self.extra_layers[-1].att_e_tl)/(T.exp(self.extra_layers[-1].att_e_tl)).sum(0,keepdims=True) for i in xrange(self.dnn_layer_num): if i == 0: #input_size = self.conv_output_dim #layer_input = (self.extra_layers[-1].att_a_tl*self.layers[-1].output) input_size = self.conv_output_dim + self.extra_layers_sizes[-1] layer_input = T.join(1, self.extra_layers[-1].att_a_tl, self.layers[-1].output) else: input_size = cfg.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None b = None hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=cfg.hidden_layers_sizes[i], W=W, b=b) self.layers.append(hidden_layer) logLayer = OutputLayer(input=self.layers[-1].output, n_in=cfg.hidden_layers_sizes[-1], n_out=self.n_outs) self.layers.append(logLayer)
def lyr_gru_nogate(name_, s_x_, s_state_, idim_, sdim_, lyr_linear_, axis_=-1): s_interp_lin, s_state_tp1_lin = T.split( lyr_linear_(name_ + '_main', T.join(axis_, s_x_, s_state_), idim_ + sdim_, sdim_ * 2), [sdim_] * 2, 2, axis_) s_interp = T.nnet.sigmoid(s_interp_lin) return T.tanh(s_state_tp1_lin) * s_interp + s_state_ * (1. - s_interp)
def test_kmax(): nbatches, nkernels_in, nwords, ndim = 3, 1, 7, 2 input_shape = (nbatches, nkernels_in, nwords, ndim) image_data = np.ones(input_shape, dtype=np.float64) image_data = np.random.rand(*input_shape) input = theano.shared(image_data) # sent_sizes_data = np.array([3, 2, 3, 2, 4, 5, 3])[:,np.newaxis].astype('int32') # sent_sizes = theano.shared(sent_sizes_data, borrow=True) # sent_sizes_matrix = T.repeat(sent_sizes, ndim, axis=1) # print 'sent_sizes_matrix', sent_sizes_matrix.eval() sent_sizes_data = np.random.randint(1, 5, size=(nbatches, 1)) sent_sizes = theano.shared(sent_sizes_data, borrow=True) sent_sizes_matrix = T.repeat(sent_sizes, nwords, axis=1) print 'sent_sizes_matrix' print sent_sizes_matrix.eval() idx = T.arange(nwords).dimshuffle('x', 0) idx_matrix = T.repeat(idx, nbatches, axis=0) print 'idx_matrix' print idx_matrix.eval() sent_sizes_mask = T.lt(idx_matrix, sent_sizes_matrix) print 'sent_sizes_mask' print sent_sizes_mask.eval() k_max = 4 # f_kmax = theano.function([input], kmax_pool(input, k)) # k = theano.shared(k_max, name='k-max') # kmax_limit = nwords * T.ceil(L-l)/L # Unroll input into 2d ndim x (batch_size x nkernels_in x nwords) # pool = TSN.images2neibs(input, (input.shape[2], 1), mode='ignore_borders') print 'input', input.eval() neighborsArgSorted = T.argsort(input, axis=2) print 'neighborsArgSorted' print neighborsArgSorted.eval() neighborsArgSorted_masked = (neighborsArgSorted * sent_sizes_mask.dimshuffle(0, 'x', 1, 'x')) print 'neighborsArgSorted_masked' print neighborsArgSorted_masked.eval() neighborsArgSorted_clipped = ( neighborsArgSorted * sent_sizes_mask.dimshuffle(0, 'x', 1, 'x'))[:, :, :k_max, :] print 'args' print neighborsArgSorted_clipped.eval() return # Given a column of sentence length # Tile it along axis=1 to form a matrix # Create another matrix with T.arange() to represent indices # do T.lt to create a mask and then eliminate all indices in the neighborsArgSorted # yy = T.sort(neighborsArgSorted[:, -k:], axis=1).flatten() yy = T.sort(neighborsArgSorted_clipped, axis=3).flatten() print 'yy', yy.eval() xx = T.repeat(T.arange(neighborsArgSorted.shape[0]), k_max) pool_kmax = input[xx, yy] print pool_kmax.eval() # pool_kmax_shape = T.join(0, T.as_tensor([input.shape[0], input.shape[1], input.shape[3], k])) # pooled_out = pool_kmax.reshape(pool_kmax_shape, ndim=4).dimshuffle(0, 1, 3, 2) pool_kmax_shape = T.join( 0, T.as_tensor( [input.shape[0], input.shape[1], input.shape[3], kmax_limit])) pooled_out = pool_kmax.reshape(pool_kmax_shape, ndim=4).dimshuffle(0, 1, 3, 2) # pooled_out = TSN.neibs2images(pool_kmax, (input_shape[2], 1), input_shape, mode='valid') #.dimshuffle(0, 1, 3, 2) # image_data = np.arange(np.prod(input_shape), dtype=np.float64).reshape(input_shape) print image_data print 'kmax', k_max
def conv2d(input, filters, image_shape=None, filter_shape=None, border_mode='valid', subsample=(1, 1), **kargs): """ signal.conv.conv2d performs a basic 2D convolution of the input with the given filters. The input parameter can be a single 2D image or a 3D tensor, containing a set of images. Similarly, filters can be a single 2D filter or a 3D tensor, corresponding to a set of 2D filters. Shape parameters are optional and will result in faster execution. Parameters ---------- input : dmatrix of dtensor3 Symbolic variable for images to be filtered. filters : dmatrix of dtensor3 Symbolic variable containing filter values. border_mode: {'valid', 'full'} See scipy.signal.convolve2d. subsample Factor by which to subsample output. image_shape : tuple of length 2 or 3 ([number images,] image height, image width). filter_shape : tuple of length 2 or 3 ([number filters,] filter height, filter width). kwargs See theano.tensor.nnet.conv.conv2d. Returns ------- symbolic 2D,3D or 4D tensor Tensor of filtered images, with shape ([number images,] [number filters,] image height, image width). """ assert input.ndim in (2, 3) assert filters.ndim in (2, 3) # use shape information if it is given to us ### if filter_shape and image_shape: if input.ndim == 3: bsize = image_shape[0] else: bsize = 1 imshp = (1, ) + tuple(image_shape[-2:]) if filters.ndim == 3: nkern = filter_shape[0] else: nkern = 1 kshp = filter_shape[-2:] else: nkern, kshp = None, None bsize, imshp = None, None # reshape tensors to 4D, for compatibility with ConvOp ### if input.ndim == 3: sym_bsize = input.shape[0] else: sym_bsize = 1 if filters.ndim == 3: sym_nkern = filters.shape[0] else: sym_nkern = 1 new_input_shape = tensor.join(0, tensor.stack([sym_bsize, 1]), input.shape[-2:]) input4D = tensor.reshape(input, new_input_shape, ndim=4) new_filter_shape = tensor.join(0, tensor.stack([sym_nkern, 1]), filters.shape[-2:]) filters4D = tensor.reshape(filters, new_filter_shape, ndim=4) # perform actual convolution ### op = conv.ConvOp(output_mode=border_mode, dx=subsample[0], dy=subsample[1], imshp=imshp, kshp=kshp, nkern=nkern, bsize=bsize, **kargs) output = op(input4D, filters4D) # flatten to 3D tensor if convolving with single filter or single image if input.ndim == 2 and filters.ndim == 2: if theano.config.warn.signal_conv2d_interface: warnings.warn( "theano.tensor.signal.conv2d() now outputs a 2d tensor when both" " inputs are 2d. To disable this warning, set the Theano flag" " warn.signal_conv2d_interface to False", stacklevel=3) output = tensor.flatten(output.T, outdim=2).T elif input.ndim == 2 or filters.ndim == 2: output = tensor.flatten(output.T, outdim=3).T return output
def test_gpujoin_no_rebroadcast(): _a = numpy.asarray([[1, 2], [3, 4]], dtype='float32') a = tcn.shared_constructor(_a) f = theano.function([], T.join(1, a)) l = f.maker.env.toposort() assert not any([isinstance(x.op, T.Rebroadcast) for x in l])
def normal( self, size, avg=0.0, std=1.0, ndim=None, dtype=None, nstreams=None, truncate=False, **kwargs, ): """ Sample a tensor of values from a normal distribution. Parameters ---------- size : int_vector_like Array dimensions for the output tensor. avg : float_like, optional The mean value for the truncated normal to sample from (defaults to 0.0). std : float_like, optional The standard deviation for the truncated normal to sample from (defaults to 1.0). truncate : bool, optional Truncates the normal distribution at 2 standard deviations if True (defaults to False). When this flag is set, the standard deviation of the result will be less than the one specified. ndim : int, optional The number of dimensions for the output tensor (defaults to None). This argument is necessary if the size argument is ambiguous on the number of dimensions. dtype : str, optional The data-type for the output tensor. If not specified, the dtype is inferred from avg and std, but it is at least as precise as floatX. kwargs Other keyword arguments for random number generation (see uniform). Returns ------- samples : TensorVariable A Theano tensor of samples randomly drawn from a normal distribution. """ size = _check_size(size) avg = undefined_grad(as_tensor_variable(avg)) std = undefined_grad(as_tensor_variable(std)) if dtype is None: dtype = scal.upcast(config.floatX, avg.dtype, std.dtype) avg = tensor.cast(avg, dtype=dtype) std = tensor.cast(std, dtype=dtype) # generate even number of uniform samples # Do manual constant folding to lower optiimizer work. if isinstance(size, theano.Constant): n_odd_samples = size.prod(dtype="int64") else: n_odd_samples = tensor.prod(size, dtype="int64") n_even_samples = n_odd_samples + n_odd_samples % 2 uniform = self.uniform( (n_even_samples, ), low=0.0, high=1.0, ndim=1, dtype=dtype, nstreams=nstreams, **kwargs, ) # box-muller transform u1 = uniform[:n_even_samples // 2] u2 = uniform[n_even_samples // 2:] r = tensor.sqrt(-2.0 * tensor.log(u1)) theta = np.array(2.0 * np.pi, dtype=dtype) * u2 cos_theta, sin_theta = tensor.cos(theta), tensor.sin(theta) z0 = r * cos_theta z1 = r * sin_theta if truncate: # use valid samples to_fix0 = (z0 < -2.0) | (z0 > 2.0) to_fix1 = (z1 < -2.0) | (z1 > 2.0) z0_valid = z0[tensor.nonzero(~to_fix0)] z1_valid = z1[tensor.nonzero(~to_fix1)] # re-sample invalid samples to_fix0 = tensor.nonzero(to_fix0)[0] to_fix1 = tensor.nonzero(to_fix1)[0] n_fix_samples = to_fix0.size + to_fix1.size lower = tensor.constant(1.0 / np.e**2, dtype=dtype) u_fix = self.uniform( (n_fix_samples, ), low=lower, high=1.0, ndim=1, dtype=dtype, nstreams=nstreams, **kwargs, ) r_fix = tensor.sqrt(-2.0 * tensor.log(u_fix)) z0_fixed = r_fix[:to_fix0.size] * cos_theta[to_fix0] z1_fixed = r_fix[to_fix0.size:] * sin_theta[to_fix1] # pack everything together to a useful result norm_samples = tensor.join(0, z0_valid, z0_fixed, z1_valid, z1_fixed) else: norm_samples = tensor.join(0, z0, z1) if isinstance(n_odd_samples, theano.Variable): samples = norm_samples[:n_odd_samples] elif n_odd_samples % 2 == 1: samples = norm_samples[:-1] else: samples = norm_samples samples = tensor.reshape(samples, newshape=size, ndim=ndim) samples *= std samples += avg return samples
def test_join(self): tv = numpy.asarray(self.rng.uniform(size=(10, )), theano.config.floatX) t = theano.shared(tv) out = tensor.join(0, self.x, t) self.check_rop_lop(out, (self.in_shape[0] + 10, ))
def pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0), mode='max'): """Downscale the input by a specified factor Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) Parameters ---------- input : N-D theano tensor of input images Input images. Max pooling will be done over the 2 last dimensions. ds : tuple of length 2 Factor by which to downscale (vertical ds, horizontal ds). (2,2) will halve the image in each dimension. ignore_border : bool (default None, will print a warning and set to False) When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. st : tuple of two ints Stride size, which is the number of shifts over rows/cols to get the next pool region. If st is None, it is considered equal to ds (no overlap on pooling regions). padding : tuple of two ints (pad_h, pad_w), pad zeros to extend beyond four borders of the images, pad_h is the size of the top and bottom margins, and pad_w is the size of the left and right margins. mode : {'max', 'sum', 'average_inc_pad', 'average_exc_pad'} Operation executed on each window. `max` and `sum` always exclude the padding in the computation. `average` gives you the choice to include or exclude it. """ if input.ndim < 2: raise NotImplementedError('pool_2d requires a dimension >= 2') if ignore_border is None: warnings.warn( "pool_2d() will have the parameter ignore_border" " default value changed to True (currently" " False). To have consistent behavior with all Theano" " version, explicitly add the parameter ignore_border=True." " On the GPU, using ignore_border=False is needed to use CuDNN." " When using ignore_border=False and not using CuDNN, the only" " GPU combination supported is when" " `ds == st and padding == (0, 0) and mode == 'max'`." " Otherwise, the convolution will be executed on CPU.", stacklevel=2) ignore_border = False if input.ndim == 4: op = Pool(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast( tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = Pool(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def conv2d(input, filters, image_shape=None, filter_shape=None, border_mode='valid', subsample=(1,1), **kargs): """ signal.conv.conv2d performs a basic 2D convolution of the input with the given filters. The input parameter can be a single 2D image or a 3D tensor, containing a set of images. Similarly, filters can be a single 2D filter or a 3D tensor, corresponding to a set of 2D filters. Shape parameters are optional and will result in faster execution. :type input: dmatrix of dtensor3 :param input: symbolic variable for images to be filtered :type filters: dmatrix of dtensor3 :param filters: symbolic variable containing filter values :param border_mode: 'valid' or 'full'. see scipy.signal.convolve2d :param subsample: factor by which to subsample output :type image_shape: tuple of length 2 or 3 :param image_shape: ([number images,] image height, image width) :type filter_shape: tuple of length 2 or 3 :param filter_shape: ([number filters,] filter height, filter width) :param kwargs: see theano.tensor.nnet.conv.conv2d :rtype: symbolic 2D,3D or 4D tensor :return: tensor of filtered images, with shape ([number images,] [number filters,] image height, image width) """ assert input.ndim in (2,3) assert filters.ndim in (2,3) ### use shape information if it is given to us ### if filter_shape and image_shape: if input.ndim==3: bsize = image_shape[0] else: bsize = 1 imshp = (1,) + tuple(image_shape[-2:]) if filters.ndim==3: nkern = filter_shape[0] else: nkern = 1 kshp = filter_shape[-2:] else: nkern, kshp = None, None bsize, imshp = None, None ### reshape tensors to 4D, for compatibility with ConvOp ### if input.ndim==3: sym_bsize = input.shape[0] else: sym_bsize = 1 if filters.ndim==3: sym_nkern = filters.shape[0] else: sym_nkern = 1 new_input_shape = tensor.join(0, tensor.stack(sym_bsize,1), input.shape[-2:]) input4D = tensor.reshape(input, new_input_shape, ndim=4) new_filter_shape = tensor.join(0, tensor.stack(sym_nkern,1), filters.shape[-2:]) filters4D = tensor.reshape(filters, new_filter_shape, ndim=4) ### perform actual convolution ### op = conv.ConvOp(output_mode=border_mode, dx=subsample[0], dy=subsample[1], imshp=imshp, kshp=kshp, nkern=nkern, bsize=bsize,**kargs) output = op(input4D, filters4D) # flatten to 3D tensor if convolving with single filter or single image if input.ndim==2 or filters.ndim==2: output = tensor.flatten(output.T, outdim=3).T return output
def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0), mode='max'): """ Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 2 last dimensions. :type ds: tuple of length 2 :param ds: factor by which to downscale (vertical ds, horizontal ds). (2,2) will halve the image in each dimension. :type ignore_border: bool :param ignore_border: When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. :type st: tuple of lenght 2 :param st: stride size, which is the number of shifts over rows/cols to get the the next pool region. if st is None, it is considered equal to ds (no overlap on pooling regions) :param padding: (pad_h, pad_w), pad zeros to extend beyond four borders of the images, pad_h is the size of the top and bottom margins, and pad_w is the size of the left and right margins. :type padding: tuple of two ints :param mode: 'max', 'sum', 'average_inc_pad' or 'average_exc_pad'. Operation executed on each window. `max` and `sum` always exclude the padding in the computation. `average` gives you the choice to include or exclude it. :type mode: string """ if input.ndim < 2: raise NotImplementedError('max_pool_2d requires a dimension >= 2') if input.ndim == 4: op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast( tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
@description: 小测试实例 ''' import theano import theano.tensor as tt x = tt.matrix('x') f = theano.function([x], (x**2).shape) theano.printing.debugprint(f) print("\n") import numpy x = tt.matrix('x') y = tt.matrix('y') z = tt.join(0, x, y) xv = numpy.random.rand(5, 4) yv = numpy.random.rand(3, 3) f = theano.function([x, y], z.shape) theano.printing.debugprint(f) print("\n") f1 = f(xv, yv) theano.printing.debugprint(f1) print("\n") f1 = theano.function([x, y], z) # Do not take the shape. theano.printing.debugprint(f1) print("\n") x = tt.matrix()
def max_pool_3d(input, ds, ignore_border=False): """ Takes as input a N-D tensor, where N >= 3. It downscales the input by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1],ds[2]) (depth, height, width) Arguments: input (N-D theano tensor of input images): input images. Max pooling will be done over the 3 last dimensions. ds (tuple of length 3): factor by which to downscale. (2,2,2) will halve the video in each dimension. ignore_border (boolean): When True, (5,5,5) input with ds=(2,2,2) will generate a (2,2,2) output. (3,3,3) otherwise. """ if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') # extract nr dimensions vid_dim = input.ndim # max pool in two different steps, so we can use the 2d implementation of # downsamplefactormax. First maxpool frames as usual. # Then maxpool the depth dimension. Shift the depth dimension to the third # position, so rows and cols are in the back # extract dimensions frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast( tensor.join(0, batch_size, tensor.as_tensor([ 1, ]), frame_shape), 'int32') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = DownsampleFactorMax((ds[1], ds[2]), ignore_border) output = op(input_4D) # restore to original shape outshape = tensor.join(0, input.shape[:-2], output.shape[-2:]) out = tensor.reshape(output, outshape, ndim=input.ndim) # now maxpool depth # output (depth, rows, cols), reshape so that depth is in the back shufl = (list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [vid_dim - 3]) input_depth = out.dimshuffle(shufl) # reset dimensions vid_shape = input_depth.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input_depth.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,width,depth) new_shape = tensor.cast( tensor.join(0, batch_size, tensor.as_tensor([ 1, ]), vid_shape), 'int32') input_4D_depth = tensor.reshape(input_depth, new_shape, ndim=4) # downsample mini-batch of videos in depth op = DownsampleFactorMax((1, ds[0]), ignore_border) outdepth = op(input_4D_depth) # output # restore to original shape (xxx, rows, cols, depth) outshape = tensor.join(0, input_depth.shape[:-2], outdepth.shape[-2:]) shufl = (list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [vid_dim - 2]) return tensor.reshape(outdepth, outshape, ndim=input.ndim).dimshuffle(shufl)
def __init__(self, numpy_rng, theano_rng=None, cfg=None, testing=False, input=None): self.layers = [] self.extra_layers = [] self.params = [] self.delta_params = [] self.n_ins = cfg.n_ins self.n_outs = cfg.n_outs self.conv_layers = [] self.cfg = cfg self.conv_layer_configs = cfg.conv_layer_configs self.conv_activation = cfg.conv_activation self.use_fast = cfg.use_fast self.extra_x = T.matrix('extra_x') # 1.5 attention self.extra_dim = cfg.extra_dim print 'Extra input dimension: ' + str(cfg.extra_dim) self.extra_layers_sizes = cfg.extra_layers_sizes # 2. dnn self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) if input == None: self.x = T.matrix('x') else: self.x = input self.y = T.matrix('y') ####################### # build cnn layers # ####################### print '1. start to build cnn mag layer: ' + str( self.conv_layer_configs) self.conv_layer_num = len(self.conv_layer_configs) for i in xrange(self.conv_layer_num): if i == 0: input = self.x else: input = self.layers[-1].output config = self.conv_layer_configs[i] conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, input_shape=config['input_shape'], filter_shape=config['filter_shape'], poolsize=config['poolsize'], activation=self.conv_activation, flatten=config['flatten'], use_fast=self.use_fast, testing=testing) self.layers.append(conv_layer) self.conv_layers.append(conv_layer) self.params.extend(conv_layer.params) self.delta_params.extend(conv_layer.delta_params) self.conv_output_dim = config['output_shape'][1] * config[ 'output_shape'][2] * config['output_shape'][3] cfg.n_ins = config['output_shape'][1] * config['output_shape'][ 2] * config['output_shape'][3] ####################################### # build phase-based attention layer # ####################################### # 0. phase-based attention print '2. start to build attend layer: ' + str(self.extra_layers_sizes) for i in xrange(len(self.extra_layers_sizes)): if i == 0: input_size = cfg.extra_dim layer_input = self.extra_x else: input_size = self.extra_layers_sizes[i - 1] layer_input = self.extra_layers[-1].output W = None b = None attend_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.extra_layers_sizes[i], W=W, b=b, activation=self.activation) print '\tbuild attend layer: ' + str(input_size) + ' x ' + str( attend_layer.n_out) self.extra_layers.append(attend_layer) self.params.extend(attend_layer.params) self.delta_params.extend(attend_layer.delta_params) self.extra_output = self.extra_layers[-1].output self.extra_output = T.nnet.softmax(self.extra_layers[-1].output) #self.extra_output_rand = numpy.asarray(numpy_rng.uniform( # low=-0.1, # high=1.0, # size=(32,20)), dtype=theano.config.floatX) #self.extra_output = theano.shared(value=self.extra_output_rand, name='rand', borrow=True) print '2. finish attend layer softmax(0): ' + str( self.extra_layers[-1].n_out) ####################################### # build dnnv # ####################################### print '3. start to build dnnv layer: ' + str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): # construct the hidden layer if i == 0: # 1. Join two features (magnitude + phase) input_size = self.conv_output_dim + self.extra_layers_sizes[-1] layer_input = T.join(1, self.layers[-1].output, self.extra_output) # 2. Weighted Sum (magnitude * phase) #input_size = self.conv_output_dim #layer_input = self.layers[-1].output * self.extra_output else: input_size = self.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None b = None hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W=W, b=b, activation=self.activation) print '\tbuild dnnv layer: ' + str(input_size) + ' x ' + str( hidden_layer.n_out) # add the layer to our list of layers self.layers.append(hidden_layer) self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) print '3. finish dnnv layer: ' + str(self.layers[-1].n_out) ####################################### # build logistic regression layer # ####################################### print '4. start to build log layer: 1' # We now need to add a logistic layer on top of the MLP self.logLayer = OutputLayer(input=self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) print '\tbuild final layer: ' + str( self.layers[-1].n_out) + ' x ' + str(self.n_outs) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) print '4. finish log layer: ' + str(self.layers[-1].n_out) print 'Total layers: ' + str(len(self.layers)) self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) sys.stdout.flush()
def normal(self, size, avg=0.0, std=1.0, ndim=None, dtype=None, nstreams=None): """ :param size: Can be a list of integers or Theano variables (ex: the shape of another Theano Variable) :param dtype: The output data type. If dtype is not specified, it will be inferred from the dtype of low and high, but will be at least as precise as floatX. :param nstreams: Number of streams. """ # We need an even number of ]0,1[ samples. Then we split them # in two halves. First half becomes our U1's for Box-Muller, # second half our U2's. See Wikipedia page: # http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform avg = as_tensor_variable(avg) std = as_tensor_variable(std) if dtype is None: dtype = scal.upcast(config.floatX, avg.dtype, std.dtype) avg = cast(avg, dtype) std = cast(std, dtype) evened = False constant = False if isinstance(size, tuple) and all( [isinstance(i, (numpy.integer, int)) for i in size]): constant = True n_samples = numpy.prod(size) if n_samples % 2 == 1: n_samples += 1 evened = True else: #if even, don't change, if odd, +1 n_samples = prod(size) + (prod(size) % 2) flattened = self.uniform(size=(n_samples, ), dtype=dtype, nstreams=nstreams) if constant: U1 = flattened[:n_samples // 2] U2 = flattened[n_samples // 2:] else: U1 = flattened[:prod(flattened.shape) // 2] U2 = flattened[prod(flattened.shape) // 2:] #normal_samples = zeros_like(flattened) sqrt_ln_U1 = sqrt(-2.0 * log(U1)) # TypeError: 'TensorVariable' object does not support item assignment # so this doesn't work... #normal_samples[:n_samples/2] = sqrt_ln_U1 * cos(2.0*numpy.pi*U2) #normal_samples[n_samples/2:] = sqrt_ln_U1 * sin(2.0*numpy.pi*U2) # so trying this instead first_half = sqrt_ln_U1 * cos( numpy.array(2.0 * numpy.pi, dtype=dtype) * U2) second_half = sqrt_ln_U1 * sin( numpy.array(2.0 * numpy.pi, dtype=dtype) * U2) normal_samples = join(0, first_half, second_half) final_samples = None if evened: final_samples = normal_samples[:-1] elif constant: final_samples = normal_samples else: final_samples = normal_samples[:prod(size)] if size: final_samples = final_samples.reshape(size) final_samples = avg + std * final_samples assert final_samples.dtype == dtype return final_samples
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2), k=4): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height,filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows,#cols) """ assert image_shape[1] == filter_shape[1] self.input = input # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = numpy.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) / numpy.prod(poolsize)) # initialize weights with random weights W_bound = numpy.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(numpy.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) # the bias is a 1D tensor -- one bias per output feature map b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv.conv2d(input=input, filters=self.W, filter_shape=filter_shape, image_shape=image_shape) #images2neibs produces a 2D matrix neighborsForPooling = TSN.images2neibs(ten4=conv_out, neib_shape=(1,conv_out.shape[3]), mode='ignore_borders') #k = poolsize[1] neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) kNeighborsArg = neighborsArgSorted[:,-k:] kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() pooledkmaxTmp = neighborsForPooling[ii, jj] # reshape pooledkmaxTmp new_shape = T.cast(T.join(0, conv_out.shape[:-2], T.as_tensor([conv_out.shape[2]]), T.as_tensor([k])), 'int64') pooled_out = T.reshape(pooledkmaxTmp, new_shape, ndim=4) # downsample each feature map individually, using maxpooling ''' pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True) ''' # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b]