def test_chunk_unchunk_grad2(): n_time = 101 n_batch = 3 n_dim = 5 numpy.random.seed(1234) _x = numpy.random.randn(n_time, n_batch, n_dim).astype(f32) _Dx2 = numpy.random.randn(n_time, n_batch, n_dim).astype(f32) _index = numpy.ones((n_time, n_batch), dtype="int8") x = T.as_tensor(_x) Dx2 = T.as_tensor(_Dx2) index = T.as_tensor(_index) chunk_size = 11 chunk_step = 7 out, oindex = chunk(x, index=index, chunk_size=chunk_size, chunk_step=chunk_step) chunk_op = NativeOp.Chunking().make_op() assert type(out.owner.op) is type(chunk_op) x2, index2, factors = unchunk(out, index=oindex, chunk_size=chunk_size, chunk_step=chunk_step, n_time=x.shape[0], n_batch=x.shape[1]) unchunk_op = NativeOp.UnChunking().make_op() assert type(x2.owner.op) is type(unchunk_op) Dout, _, _, _, _, _ = unchunk_op.grad(x2.owner.inputs, (Dx2, None, None)) Dx, _, _, _, _ = chunk_op.grad(out.owner.inputs, (Dout, None)) _Dx = Dx.eval() assert_almost_equal(_Dx, _Dx2)
def _initial_part_matrix(self, part, size, deterministic): if size is None: size = 1 length, dist_name, dist_map = self._choose_alternative( part, (self.local_size, self.initial_dist_local_name, self.initial_dist_local_map), (self.global_size, self.initial_dist_global_name, self.initial_dist_global_map) ) dtype = self.symbolic_initial_global_matrix.dtype if length == 0: # in this case theano fails to compute sample of correct size return tt.ones((size, 0), dtype) length = tt.as_tensor(length) size = tt.as_tensor(size) shape = tt.stack((size, length)) # apply optimizations if possible if not isinstance(deterministic, tt.Variable): if deterministic: return tt.ones(shape, dtype) * dist_map else: return getattr(self._rng, dist_name)(shape) else: sample = getattr(self._rng, dist_name)(shape) initial = tt.switch( deterministic, tt.ones(shape, dtype) * dist_map, sample ) return initial
def dynamic_kmaxPooling(self, curConv_out, k): neighborsForPooling = TSN.images2neibs(ten4=curConv_out, neib_shape=(1,curConv_out.shape[3]), mode='ignore_borders') self.neighbors = neighborsForPooling neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) kNeighborsArg = neighborsArgSorted[:,-k:] #self.bestK = kNeighborsArg kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() pooledkmaxTmp = neighborsForPooling[ii, jj] new_shape = T.cast(T.join(0, T.as_tensor([neighborsForPooling.shape[0]]), T.as_tensor([k])), 'int64') pooledkmax_matrix = T.reshape(pooledkmaxTmp, new_shape, ndim=2) rightWidth=self.unifiedWidth-k right_padding = T.zeros((neighborsForPooling.shape[0], rightWidth), dtype=theano.config.floatX) matrix_padded = T.concatenate([pooledkmax_matrix, right_padding], axis=1) #recover tensor form new_shape = T.cast(T.join(0, curConv_out.shape[:-2], T.as_tensor([curConv_out.shape[2]]), T.as_tensor([self.unifiedWidth])), 'int64') curPooled_out = T.reshape(matrix_padded, new_shape, ndim=4) return curPooled_out
def get_aggregator(self): initialized = shared_like(0.) numerator_acc = shared_like(self.numerator) denominator_acc = shared_like(self.denominator) # Dummy default expression to use as the previously-aggregated # value, that has the same shape as the new result numerator_zeros = tensor.as_tensor(self.numerator).zeros_like() denominator_zeros = tensor.as_tensor(self.denominator).zeros_like() conditional_update_num = self.numerator + ifelse(initialized, numerator_acc, numerator_zeros) conditional_update_den = self.denominator + ifelse(initialized, denominator_acc, denominator_zeros) initialization_updates = [(numerator_acc, tensor.zeros_like(numerator_acc)), (denominator_acc, tensor.zeros_like(denominator_acc)), (initialized, 0.)] accumulation_updates = [(numerator_acc, conditional_update_num), (denominator_acc, conditional_update_den), (initialized, 1.)] aggregator = Aggregator(aggregation_scheme=self, initialization_updates=initialization_updates, accumulation_updates=accumulation_updates, readout_variable=(numerator_acc / denominator_acc)) return aggregator
def theano_scan_color(writer, draw_fn): with writer as writer_buf: writer_buf_reshaped = writer_buf.reshape((Screen.screen_vane_count, Screen.screen_max_magnitude, 3)) vane_matrix = [[[float(vane), float(vane), float(vane)] for px in range(Screen.screen_max_magnitude)] for vane in range(Screen.screen_vane_count)] px_matrix = [[[float(px),float(px),float(px)] for px in range(Screen.screen_max_magnitude)] for vane in range(Screen.screen_vane_count)] col_matrix = [[[float(0), float(1), float(2)] for px in range(Screen.screen_max_magnitude)] for vane in range(Screen.screen_vane_count)] vane_vec = T.as_tensor(vane_matrix) px_vec = T.as_tensor(px_matrix) col_vec = T.as_tensor(col_matrix) step = T.fscalar('step') draw_fn_with_step = draw_fn(step) f, _ = theano.map(draw_fn_with_step, [vane_vec, px_vec, col_vec]) fn_actual = theano.function([step], f, allow_input_downcast=True, on_unused_input='ignore') step_actual = 0 while True: writer.frame_ready() start = time.time() writer_buf_reshaped[:] = fn_actual(step_actual) step_actual -= 1 done = time.time() fps = 1.0/(done - start) if fps < TARGET_FPS: logging.warning('Frame rate is %f, which is lower than target %d', fps, TARGET_FPS)
def maxpool_3D(input, ds, ignore_border=False): #input.dimshuffle (0, 2, 1, 3, 4) # convert to make video in back. # no need to reshuffle. if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') # extract nr dimensions vid_dim = input.ndim # max pool in two different steps, so we can use the 2d implementation of # downsamplefactormax. First maxpool frames as usual. # Then maxpool the time dimension. Shift the time dimension to the third # position, so rows and cols are in the back # extract dimensions frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size,1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1,]), frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = DownsampleFactorMax((ds[1],ds[2]), ignore_border) # so second and third dimensions of ds are for height and width output = op(input_4D) # restore to original shape outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) # now maxpool time # output (time, rows, cols), reshape so that time is in the back shufl = (list(range(vid_dim-3)) + [vid_dim-2]+[vid_dim-1]+[vid_dim-3]) input_time = out.dimshuffle(shufl) # reset dimensions vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size,1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1,]), vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time op = DownsampleFactorMax((1,ds[0]), ignore_border) # Here the time dimension is downsampled. outtime = op(input_4D_time) # output # restore to original shape (xxx, rows, cols, time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2]) #rval = T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def max_pool(images, imgshp, maxpoolshp): """Implements a max pooling layer Takes as input a 2D tensor of shape batch_size x img_size and performs max pooling. Max pooling downsamples by taking the max value in a given area, here defined by maxpoolshp. Outputs a 2D tensor of shape batch_size x output_size. :param images: 2D tensor containing images on which to apply convolution. Assumed to be of shape batch_size x img_size :param imgshp: tuple containing image dimensions :param maxpoolshp: tuple containing shape of area to max pool over :return: out1, symbolic result (2D tensor) :return: out2, logical shape of the output """ N = numpy poolsize = N.int64(N.prod(maxpoolshp)) # imgshp contains either 2 entries (height,width) or 3 (nfeatures,h,w) # in the first case, default nfeatures to 1 if N.size(imgshp) == 2: imgshp = (1,) + imgshp # construct indices and index pointers for sparse matrix, which, # when multiplied with input images will generate a stack of image # patches indices, indptr, spmat_shape, sptype, outshp = \ convolution_indices.conv_eval(imgshp, maxpoolshp, maxpoolshp, mode='valid') # print 'XXXXXXXXXXXXXXXX MAX POOLING LAYER XXXXXXXXXXXXXXXXXXXX' # print 'imgshp = ', imgshp # print 'maxpoolshp = ', maxpoolshp # print 'outshp = ', outshp # build sparse matrix, then generate stack of image patches csc = theano.sparse.CSM(sptype)(N.ones(indices.size), indices, indptr, spmat_shape) patches = sparse.structured_dot(csc, images.T).T pshape = tensor.stack([images.shape[0] *\ tensor.as_tensor(N.prod(outshp)), tensor.as_tensor(imgshp[0]), tensor.as_tensor(poolsize)]) patch_stack = tensor.reshape(patches, pshape, ndim=3) out1 = tensor.max(patch_stack, axis=2) pshape = tensor.stack([images.shape[0], tensor.as_tensor(N.prod(outshp)), tensor.as_tensor(imgshp[0])]) out2 = tensor.reshape(out1, pshape, ndim=3) out3 = tensor.DimShuffle(out2.broadcastable, (0, 2, 1))(out2) return tensor.flatten(out3, 2), outshp
def _grad_single(self, ct, s, lnC2, GAMMI2): lnC = lnC2 GAMMI = GAMMI2 v = self.v#T.as_tensor(self.v)[:,ct:] v0 = T.as_tensor(v[v[:,0]==0, :]) v1 = T.as_tensor(v[v[:,0]==1, :]) cnp = v.shape[0] # Gradient of fE wrt the priors over final state [ofE, oxS], upd_fE_single = th.scan(fn=self._free_energy, sequences=v, non_sequences=[s,self.h,lnC,self.b]) ofE0 = ofE[v0].sum() ofE1 = ofE[v1].sum() dFE0dlnC = T.jacobian(ofE0, lnC) dFE1dlnC = T.jacobian(ofE1, lnC) dFEdlnC = T.jacobian(ofE, lnC) ofE_ = T.vector() ofE_.tag.test_value = ofE.tag.test_value # Gradient of Gamma with respect to its initial condition: GAMMA, upd_GAMMA = th.scan(fn=self._upd_gamma, outputs_info=[GAMMI], non_sequences=[ofE, self.lambd, self.alpha, self.beta, cnp], n_steps=4) dGdg = T.grad(GAMMA[-1], GAMMI) dGdfE = T.jacobian(GAMMA[-1], ofE) dGdlnC = dGdfE.dot(dFEdlnC) out1 = ofE0 out2 = ofE1 maxout = T.max([out1, out2]) exp_out1 = T.exp(GAMMA[-1]*(out1 - maxout)) exp_out2 = T.exp(GAMMA[-1]*(out2 - maxout)) norm_const = exp_out1 + exp_out2 # Derivative wrt the second output (gammi): Jac1_gammi = (-(out1-out2)*dGdg* T.exp(GAMMA[-1]*(out1+out2 - 2*maxout))/(norm_const**2)) Jac2_gammi = -Jac1_gammi # dfd1_tZ = Jac1_gammi*dCdf[1][0]+ Jac2_gammi*dCdf[1][1] # Derivative wrt first input (lnc) Jac1_lnC = (T.exp(GAMMA[-1]*(out1 + out2 - 2*maxout))/(norm_const**2)* (-dGdlnC*(out1 - out2) - GAMMA[-1]*(dFE0dlnC - dFE1dlnC))) Jac2_lnC = -Jac1_lnC Jac1 = T.concatenate([T.stack(Jac1_gammi), Jac1_lnC]) Jac2 = T.concatenate([T.stack(Jac2_gammi), Jac2_lnC]) self.debug = [Jac1_lnC, Jac2_lnC, Jac2_gammi, Jac1_gammi, dFE0dlnC, dFE1dlnC, dGdg, out1, out2, v0, v1, v, ct] return Jac1, Jac2
def __init__(self, n_units, **kwargs): super(LEAKYLPLSTM, self).__init__( n_units=n_units, n_in=n_units * 4, # forget gate (FG), output gate 1 (OG1), output gate 2 (OG2), net input (IN) n_out=n_units, n_re=n_units * 4, n_act=2 # output, cell state ) self.o_output = T.as_tensor(numpy.ones((n_units,), dtype='float32')) self.o_h = T.as_tensor(numpy.ones((n_units,), dtype='float32'))
def __init__(self, n_units, **kwargs): super(LSTMS, self).__init__( n_units=n_units, n_in=n_units * 4, # input gate, forget gate, output gate, net input n_out=n_units, n_re=n_units * 4, n_act=2 # output, cell state ) self.o_output = T.as_tensor(numpy.ones((n_units,), dtype='float32')) self.o_h = T.as_tensor(numpy.ones((n_units,), dtype='float32'))
def symbolic_g(self, symbolic_X_list,t): ''' the gx for every state x must be a matrix with dimensions [number_of_rollouts,x_dim, control_dim] with x.shape = [number_of_rollouts, x_dim] ''' x = symbolic_X_list[0] y = symbolic_X_list[1] gx = T.as_tensor(np.ones([1,self.control_dimensions])) gy = T.as_tensor(np.ones([1,self.control_dimensions])) return [gx,gy]
def __init__(self, n_units, **kwargs): super(PIDLSTM, self).__init__( n_units=n_units, n_in=n_units * 4, # forget gate (FG), Proportinal gate (PG), Difference gate (DG), net input (IN) n_out=n_units, n_re=n_units * 4, n_act=2 # output, cell state ) self.o_output = T.as_tensor(numpy.ones((n_units,), dtype='float32')) self.o_h = T.as_tensor(numpy.ones((n_units,), dtype='float32'))
def max_pool_3d(input, ds, ignore_border=False): """ Takes as input a N-D tensor, where N >= 3. It downscales the input video by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1],ds[2]) (time, height, width) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 3 last dimensions. :type ds: tuple of length 3 :param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension. :param ignore_border: boolean value. Example when True, (5,5,5) input with ds=(2,2,2) will generate a (2,2,2) output. (3,3,3) otherwise. """ if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') vid_dim = input.ndim #Maxpool frame frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size,1) new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = DownsampleFactorMax((ds[1],ds[2]), ignore_border) output = op(input_4D) # restore to original shape outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) #Maxpool time # output (time, rows, cols), reshape so that time is in the back shufl = (list(range(vid_dim-4)) + list(range(vid_dim-3,vid_dim))+[vid_dim-4]) input_time = out.dimshuffle(shufl) # reset dimensions vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size,1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time op = DownsampleFactorMax((1,ds[0]), ignore_border) outtime = op(input_4D_time) # restore to original shape (xxx, rows, cols, time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim-4)) + [vid_dim-1] + list(range(vid_dim-4,vid_dim-1))) #shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2]) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def __init__(self, conv_out, k=1): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height,filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows,#cols) """ #images2neibs produces a 2D matrix neighborsForPooling = TSN.images2neibs(ten4=conv_out, neib_shape=(conv_out.shape[2], 1), mode='ignore_borders') #k = poolsize[1] neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) kNeighborsArg = neighborsArgSorted[:,-k:] kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() pooledkmaxTmp = neighborsForPooling[ii, jj] # reshape pooledkmaxTmp new_shape = T.cast(T.join(0, conv_out.shape[:-2], T.as_tensor([conv_out.shape[3]]), T.as_tensor([k])), 'int32') pooled_out = T.reshape(pooledkmaxTmp, new_shape, ndim=4) # downsample each feature map individually, using maxpooling ''' pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, ignore_border=True) ''' # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1,n_filters,1,1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = T.tanh(pooled_out)
def make_node(self, frames, n, axis): """ compute an n-point fft of frames along given axis """ _frames = tensor.as_tensor(frames, ndim=2) _n = tensor.as_tensor(n, ndim=0) _axis = tensor.as_tensor(axis, ndim=0) if self.half and _frames.type.dtype.startswith('complex'): raise TypeError('Argument to HalfFFT must not be complex', frames) spectrogram = tensor.zmatrix() buf = generic() # The `buf` output is present for future work # when we call FFTW directly and re-use the 'plan' that FFTW creates. # In that case, buf would store a CObject encapsulating the plan. rval = Apply(self, [_frames, _n, _axis], [spectrogram, buf]) return rval
def updates(self, gradients): """ Return symbolic updates to apply given a set of gradients on the parameters being optimized. Parameters ---------- gradients : list of tensor_likes List of symbolic gradients for the parameters contained in self.params, in the same order as in self.params. Returns ------- updates : dict A dictionary with the shared variables in self.params as keys and a symbolic expression of how they are to be updated each SGD step as values. Notes ----- `cost_updates` is a convenient helper function that takes all necessary gradients with respect to a given symbolic cost. """ ups = {} # Add the learning rate/iteration updates l_ups, learn_rates = self.learning_rate_updates() safe_update(ups, l_ups) # Get the updates from sgd_updates, a PyLearn library function. p_up = dict(sgd_updates(self.params, gradients, learn_rates)) # Add the things in p_up to ups safe_update(ups, p_up) # Clip the values if needed. # We do not want the clipping values to force an upcast # of the update: updates should have the same type as params for param, (p_min, p_max) in self.clipping_values.iteritems(): p_min = tensor.as_tensor(p_min) p_max = tensor.as_tensor(p_max) dtype = param.dtype if p_min.dtype != dtype: p_min = tensor.cast(p_min, dtype) if p_max.dtype != dtype: p_max = tensor.cast(p_max, dtype) ups[param] = tensor.clip(ups[param], p_min, p_max) # Return the updates dictionary. return ups
def grad(self, inputs, dCdf): CT = T.as_tensor(self.ct) S = T.as_tensor(self.s) (jac1, jac2), _ = th.scan(fn=self._grad_single, sequences=[CT, S], non_sequences=[inputs[0][1:], inputs[0][0]]) # for t in self.ct: # out = self._grad_single(t, s) # Jac1 = T.reshape(jac1, newshape=(1,-1)) # Jac2 = T.reshape(jac2, newshape=(1,-1)) Jac = T.concatenate([jac1, jac2], axis=0) # return Jac1*dCdf[0][0] + Jac2*dCdf[0][1], return Jac.T.dot(dCdf[0]),
def test_2(): n_time = 11 n_dim = 5 numpy.random.seed(1234) _x = numpy.random.randn(n_time, n_dim).astype(f32) _idx = numpy.random.randint(0, n_dim, (n_time,)) assert _idx.shape == (n_time,) x = T.as_tensor(_x) idx = T.as_tensor(_idx) y = subtensor_batched_index(x, idx) ts = T.arange(x.shape[0]) y2 = x[ts, idx[ts]] _y = y.eval() _y2 = y2.eval() assert_almost_equal(_y, _y2)
def max_pool_3d(input, ds, ignore_border=False): # [n,c,x,y,z]以外の入力は受け付けない if input.ndim != 5: raise NotImplementedError( 'max_pool_3d requires a input [n, c, x, y, z]') # 入力次元 vid_dim = input.ndim # [y, z]フレームの次元数 frame_shape = input.shape[-2:] # バッチサイズ # フレーム次元以外の全ての次元の要素数を掛け合わせる batch_size = T.prod(input.shape[:-2]) # http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.shape_padright batch_size = T.shape_padright(batch_size, 1) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1, ]), frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) op = DownsampleFactorMax((ds[1], ds[2]), ignore_border) output = op(input_4D) outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) shufl = ( list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [ vid_dim - 3]) input_time = out.dimshuffle(shufl) vid_shape = input_time.shape[-2:] batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1, ]), vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) op = DownsampleFactorMax((1, ds[0]), ignore_border) outtime = op(input_4D_time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = ( list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [ vid_dim - 2]) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def test_chunk_unchunk_grad(): n_time = 101 n_batch = 3 n_dim = 5 numpy.random.seed(1234) _x = numpy.random.randn(n_time, n_batch, n_dim).astype(f32) _index = numpy.ones((n_time, n_batch), dtype="int8") x = T.as_tensor(_x) index = T.as_tensor(_index) chunk_size = 11 chunk_step = 7 out, oindex = chunk(x, index=index, chunk_size=chunk_size, chunk_step=chunk_step) x2, index2, factors = unchunk(out, index=oindex, chunk_size=chunk_size, chunk_step=chunk_step, n_time=x.shape[0], n_batch=x.shape[1]) grad = T.grad(T.sum((x2 - x) ** 2), wrt=x) _grad = grad.eval() assert_almost_equal(_grad, 0)
def max_pool_2d(input, ds, ignore_border=False): """ Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 2 last dimensions. :type ds: tuple of length 2 :param ds: factor by which to downscale. (2,2) will halve the image in each dimension. :param ignore_border: boolean value. When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. """ if input.ndim < 2: raise NotImplementedError("max_pool_2d requires a dimension >= 2") # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), "int64") input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = DownsampleFactorMax(ds, ignore_border) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def get_aggregator(self): initialized = shared_like(0.) total_acc = shared_like(self.variable) total_zeros = tensor.as_tensor(self.variable).zeros_like() conditional_update_num = self.variable + ifelse(initialized, total_acc, total_zeros) initialization_updates = [(total_acc, tensor.zeros_like(total_acc)), (initialized, tensor.zeros_like(initialized))] accumulation_updates = [(total_acc, conditional_update_num), (initialized, tensor.ones_like(initialized))] aggregator = Aggregator(aggregation_scheme=self, initialization_updates=initialization_updates, accumulation_updates=accumulation_updates, readout_variable=(total_acc)) return aggregator
def conv_forward_prop_test(sentences): # layers = T.arange(self.num_layers) k = calculate_k(sentences.shape[-1], 1) # k = tPrint("first k")(k) first_layer = create_convolution_layer(sentences, T.switch(self.dropout, self.filters[0] * 0.8, self.filters[0]), self.folding_layers[0], k, self.biases[0]) k = calculate_k(sentences.shape[-1], 2) # k = tPrint("second k")(k) second_layer = create_convolution_layer(first_layer, T.switch(self.dropout, self.filters[1] * 0.5, self.filters[1]), self.folding_layers[1], k, self.biases[1]) k = T.as_tensor(self.k_top) # k = tPrint("k_top")(k) third_layer = create_convolution_layer(second_layer, T.switch(self.dropout, self.filters[2] * 0.5, self.filters[2]), self.folding_layers[2], k, self.biases[2]) return third_layer
def test_1(): n_time = 11 n_batch = 3 n_dim = 5 numpy.random.seed(1234) _x = numpy.random.randn(n_time, n_batch, n_dim).astype(f32) _idx = numpy.random.randint(0, n_dim, (n_time, n_batch)) assert _idx.shape == (n_time, n_batch) x = T.as_tensor(_x) idx = T.as_tensor(_idx) y = subtensor_batched_index(x, idx) ts = T.arange(x.shape[0] * x.shape[1]) y2 = x.reshape((ts.shape[0], x.shape[2]))[ts, idx.flatten()[ts]].reshape(idx.shape) _y = y.eval() _y2 = y2.eval() assert_almost_equal(_y, _y2)
def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0), mode='max'): """ Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 2 last dimensions. :type ds: tuple of length 2 :param ds: factor by which to downscale (vertical ds, horizontal ds). (2,2) will halve the image in each dimension. :type ignore_border: bool :param ignore_border: When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. :type st: tuple of lenght 2 :param st: stride size, which is the number of shifts over rows/cols to get the the next pool region. if st is None, it is considered equal to ds (no overlap on pooling regions) :param padding: (pad_h, pad_w), pad zeros to extend beyond four borders of the images, pad_h is the size of the top and bottom margins, and pad_w is the size of the left and right margins. :type padding: tuple of two ints :param mode: 'max', 'average_inc_pad' or 'average_exc_pad'. Operation executed on each window. `max` always excludes the padding in the computation. `average` gives you the choice to include or exclude it. :type mode: string """ if input.ndim < 2: raise NotImplementedError('max_pool_2d requires a dimension >= 2') if input.ndim == 4: op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def __init__(self, factor=numpy.sqrt(2), decay=1.0, min_factor=None, padding=False, **kwargs): super(ConvFMPLayer, self).__init__(**kwargs) if min_factor is None: min_factor = factor factor = T.maximum(factor * (decay ** self.network.epoch), numpy.float32(min_factor)) sizes_raw = self.source.output_sizes # handle size problems if not padding: padding = T.min(self.source.output_sizes / factor) <= 0 padding = theano.printing.Print(global_fn=maybe_print_pad_warning)(padding) fixed_sizes = T.maximum(sizes_raw, T.cast(T.as_tensor( [factor + self.filter_height - 1, factor + self.filter_width - 1]), 'float32')) sizes = ifelse(padding, fixed_sizes, sizes_raw) X_size = T.cast(T.max(sizes, axis=0), "int32") def pad_fn(x_t, s): x = T.alloc(numpy.cast["float32"](0), X_size[0], X_size[1], self.X.shape[3]) x = T.set_subtensor(x[:s[0], :s[1]], x_t[:s[0], :s[1]]) return x fixed_X, _ = theano.scan(pad_fn, [self.X.dimshuffle(2, 0, 1, 3), T.cast(sizes_raw, "int32")]) fixed_X = fixed_X.dimshuffle(1, 2, 0, 3) self.X = ifelse(padding, T.unbroadcast(fixed_X, 3), self.X) conv_out = CuDNNConvHWBCOpValidInstance(self.X, self.W, self.b) conv_out_sizes = self.conv_output_size_from_input_size(sizes) self.output, self.output_sizes = fmp(conv_out, conv_out_sizes, T.cast(factor,'float32'))
def max_pool_switch_2d(input, ds, ignore_border=None, st=None, padding=(0, 0), index_type='flattened', index_scope='local'): if input.ndim < 2: raise NotImplementedError('max_pool_switched_2d requires a dimension >= 2') if ignore_border is None: ignore_border = False if input.ndim == 4: op = MaxPoolSwitch(ds, ignore_border, st=st, padding=padding, index_type=index_type, index_scope=index_scope) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1]), img_shape), 'int64') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = MaxPoolSwitch(ds, ignore_border, st=st, padding=padding, index_type=index_type, index_scope=index_scope) output = op(input_4D) # restore to original shape outshp = T.join(0, input.shape[:-2], output.shape[-2:]) return T.reshape(output, outshp, ndim=input.ndim)
def grad(self, inputs, g_outputs): (rho, ) = inputs (gz,) = g_outputs A = self.Id - tt.mul(rho, self.Wd) dinv = tt.nlinalg.matrix_inverse(A).T out = tt.mul(dinv, - self.Wd) return [tt.as_tensor(tt.sum(tt.mul(out, gz)), ndim=1)]
def run(gates, num_registers, max_int, num_timesteps, num_layers, reg_lambda, params, clip_gradients=None): params = make_broadcastable(params, clip_gradients=clip_gradients) # Create symbolic variables for the input to the machine # and for the desired output of the machine. initial_mem = dtensor3("InMem") desired_mem = imatrix("OutMem") cost_mask = bmatrix("CostMask") entropy_weight = dscalar("EntropyWeight") # Initialize all registers to zero. Instead of using to_one_hot, # create the shape directly; it's simpler this way. initial_registers = zeros((initial_mem.shape[0], num_registers, max_int), dtype='float64') initial_registers = set_subtensor(initial_registers[:, :, 0], 1.0) # Run the model for all timesteps. The arguments are # registers, memory, cost, cumulative probability complete, # and probability incomplete. The latter are initialized # to zero and to one, respectively. v0 = as_tensor(0) v1 = as_tensor(1) output = (initial_registers, initial_mem, v0, v0, v1) debug = {} for timestep in range(num_timesteps): debug_local, output = step_cost(gates, max_int, desired_mem, cost_mask, num_timesteps, num_registers, num_layers, entropy_weight, timestep + 1, *output, params) debug.update(("%d:%s" % (timestep, k), v) for (k, v) in debug_local.items()) # Add in regularization, to avoid overfitting simple examples. reg_cost = reg_lambda * sum((p * p).sum() for p in params) debug['cost-regularization'] = reg_cost # Get the final cost: regularization plus loss. final_cost = reg_cost + output[2].sum() debug['cost-final'] = final_cost # Return the symbolic variables, the final cost, and the # intermediate register values for analysis and prediction. mem = output[1] return debug, initial_mem, desired_mem, cost_mask, mem, final_cost, entropy_weight
def folding(self, curConv_out): #folding matrix_shape=T.cast(T.join(0, T.as_tensor([T.prod(curConv_out.shape[:-1])]), T.as_tensor([curConv_out.shape[3]])), 'int64') matrix = T.reshape(curConv_out, matrix_shape, ndim=2) odd_matrix=matrix[0:matrix_shape[0]:2] even_matrix=matrix[1:matrix_shape[0]:2] raw_folded_matrix=odd_matrix+even_matrix out_shape=T.cast(T.join(0, curConv_out.shape[:-2], T.as_tensor([curConv_out.shape[2]/2]), T.as_tensor([curConv_out.shape[3]])), 'int64') fold_out=T.reshape(raw_folded_matrix, out_shape, ndim=4) return fold_out
def batched_diag(C): C = tt.as_tensor(C) dim = C.shape[-1] if C.ndim == 2: # diag -> matrices return BatchedDiag()(C) elif C.ndim == 3: # matrices -> diag idx = tt.arange(dim) return C[..., idx, idx] else: raise ValueError("Input should be 2 or 3 dimensional")
def geom_convolution(t, weights, axis): """ Computes a linear convolution of log(tensor) by weights, returning exp(conv_res). Can be also seen as geometrical convolution. The result is res[.., i, ..] = w[0] * res[.., i, ..] """ t = T.as_tensor(t) res = T.ones_like(t) for i, dp in enumerate(weights): res = res * shift_right(t, dist=i, axis=axis, pad=1.0) ** dp return res
def compile(self, name, args, *optargs): key = (name,)+optargs if key not in self.compiled: old = self.get_values() self.load_tensors() names = getattr(self, '_'+name).__code__.co_varnames inputs = [tt.as_tensor(v).type(k) for k,v in zip(names, args)] + [getattr(self, i) for i in self.inputs] outputs = getattr(self, '_'+name)(*args, *optargs) self.compiled[key] = theano.function(inputs, outputs, on_unused_input='ignore') self.set_values(**old) inputs = [getattr(self, i) for i in self.inputs] return self.compiled[key](*args+inputs)
def kmaxPooling(self, fold_out, k): neighborsForPooling = TSN.images2neibs(ten4=fold_out, neib_shape=(1, fold_out.shape[3]), mode='ignore_borders') self.neighbors = neighborsForPooling neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) kNeighborsArg = neighborsArgSorted[:, -k:] #self.bestK = kNeighborsArg kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() pooledkmaxTmp = neighborsForPooling[ii, jj] new_shape = T.cast( T.join(0, fold_out.shape[:-2], T.as_tensor([fold_out.shape[2]]), T.as_tensor([k])), 'int64') pooled_out = T.reshape(pooledkmaxTmp, new_shape, ndim=4) return pooled_out
def test_equal_computations(): a, b = tensor.iscalars(2) with pytest.raises(ValueError): equal_computations([a], [a, b]) assert equal_computations([a], [a]) assert equal_computations([tensor.as_tensor(1)], [tensor.as_tensor(1)]) assert not equal_computations([b], [a]) assert not equal_computations([tensor.as_tensor(1)], [tensor.as_tensor(2)]) assert equal_computations([2], [2]) assert equal_computations([np.r_[2, 1]], [np.r_[2, 1]]) assert equal_computations([np.r_[2, 1]], [tensor.as_tensor(np.r_[2, 1])]) assert equal_computations([tensor.as_tensor(np.r_[2, 1])], [np.r_[2, 1]]) assert not equal_computations([2], [a]) assert not equal_computations([np.r_[2, 1]], [a]) assert not equal_computations([a], [2]) assert not equal_computations([a], [np.r_[2, 1]]) c = tensor.type_other.NoneConst assert equal_computations([c], [c]) m = tensor.matrix() max_argmax1 = tensor.max_and_argmax(m) max_argmax2 = tensor.max_and_argmax(m) assert equal_computations(max_argmax1, max_argmax2)
def test_chunk(): n_time = 101 n_batch = 1 n_dim = 5 chunk_size = 11 chunk_step = 7 numpy.random.seed(1234) _x = numpy.random.randn(n_time, n_batch, n_dim).astype(f32) _out, _oindex = naive_chunk(_x, chunk_size, chunk_step) _index = numpy.ones((n_time, n_batch), dtype="int8") x = T.as_tensor(_x) index = T.as_tensor(_index) out, oindex = chunk(x, index=index, chunk_size=chunk_size, chunk_step=chunk_step) _out2 = out.eval() _oindex2 = oindex.eval() assert _out.shape == _out2.shape assert _oindex.shape == _oindex2.shape assert_almost_equal(_oindex, _oindex2) assert_almost_equal(_out, _out2)
def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0)): """ Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 2 last dimensions. :type ds: tuple of length 2 :param ds: factor by which to downscale (vertical ds, horizontal ds). (2,2) will halve the image in each dimension. :type ignore_border: bool :param ignore_border: When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. :type st: tuple of lenght 2 :param st: stride size, which is the number of shifts over rows/cols to get the the next pool region. if st is None, it is considered equal to ds (no overlap on pooling regions) :param padding: (pad_h, pad_w), pad zeros to extend beyond four borders of the images, pad_h is the size of the top and bottom margins, and pad_w is the size of the left and right margins. :type padding: tuple of two ints """ if input.ndim < 2: raise NotImplementedError('max_pool_2d requires a dimension >= 2') if input.ndim == 4: op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast( tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def split_unk_outputs(self, outputs, outputs_mask): # Compute separate mask for character level (UNK) words # (with symbol < 0). charlevel_mask = outputs_mask * T.lt(outputs, 0) # ensure that char-level is never empty dummy = 1 - T.sum(charlevel_mask).clip(0, 1) dummy_mask = T.inc_subtensor(charlevel_mask[0,0], dummy) charlevel_indices = T.nonzero(dummy_mask.T) # shortlisted words directly in word level decoder, # but char level replaced with unk unked_outputs = (1 - charlevel_mask) * outputs unked_outputs += charlevel_mask * T.as_tensor( self.index['<UNK>']) return unked_outputs, charlevel_indices
def kmaxPooling(self, conv_out, k): #neighborsForPooling = TSN.images2neibs(ten4=conv_out, neib_shape=(1,conv_out.shape[3]), mode='ignore_borders') #self.neighbors = neighborsForPooling #neighborsArgSorted = T.argsort(neighborsForPooling, axis=1) #self.neighborsArgSorted = neighborsArgSorted neighborsForPooling, neighborsArgSorted = self.preparePooling(conv_out) kNeighborsArg = neighborsArgSorted[:, -k:] self.neigborsSorted = kNeighborsArg kNeighborsArgSorted = T.sort(kNeighborsArg, axis=1) ii = T.repeat(T.arange(neighborsForPooling.shape[0]), k) jj = kNeighborsArgSorted.flatten() self.ii = ii self.jj = jj pooledkmaxTmp = neighborsForPooling[ii, jj] self.pooled = pooledkmaxTmp # reshape pooled_out new_shape = T.cast( T.join(0, conv_out.shape[:-2], T.as_tensor([conv_out.shape[2]]), T.as_tensor([k])), 'int64') pooledkmax = T.reshape(pooledkmaxTmp, new_shape, ndim=4) return pooledkmax
def model(sim_data, prim_data): mod = pm.Model() with mod: probs = pm.Dirichlet("probs", np.ones(N_PRIM), SHAPE=(N_PRIM, 1)) probs = tt.as_tensor(np.ones((N_PAIRS, N_TOP, N_PRIM)) / float(N_PRIM)) ents = tt.pow(probs, prim_data_lst) ents = tt.log(ents) x1 = tt.sum(ents, axis=2) x2 = tt.exp(x1) ents = x1 * x2 ents = tt.sum(ents, axis=1)
def infer_shape(self, node, shapes): return [ shapes[0] + (tt.as_tensor(self.N),), shapes[0] + (tt.as_tensor(self.N),), shapes[0] + (tt.as_tensor(self.N),), shapes[0] + (tt.as_tensor(self.N),), shapes[0] + (tt.as_tensor(self.N),), shapes[0] + (tt.as_tensor(self.N),), ]
def test_chunk_unchunk_grad(): n_time = 101 n_batch = 3 n_dim = 5 numpy.random.seed(1234) _x = numpy.random.randn(n_time, n_batch, n_dim).astype(f32) _index = numpy.ones((n_time, n_batch), dtype="int8") x = T.as_tensor(_x) index = T.as_tensor(_index) chunk_size = 11 chunk_step = 7 out, oindex = chunk(x, index=index, chunk_size=chunk_size, chunk_step=chunk_step) x2, index2, factors = unchunk(out, index=oindex, chunk_size=chunk_size, chunk_step=chunk_step, n_time=x.shape[0], n_batch=x.shape[1]) grad = T.grad(T.sum((x2 - x)**2), wrt=x) _grad = grad.eval() assert_almost_equal(_grad, 0)
def test_boolean_mask(): tensor = T.constant([0, 1, 2, 3], dtype=theano.config.floatX) mask = np.array([True, False, True, False]) masked = nn.utils.boolean_mask(tensor, mask) utt.assert_allclose(masked.eval(), (0, 2)) tensor = [[1, 2], [3, 4], [5, 6]] mask = np.array([True, False, True]) masked = nn.utils.boolean_mask(tensor, mask) utt.assert_allclose(masked.eval(), [[1, 2], [5, 6]]) tensor_np = np.random.rand(3, 4, 2).astype(theano.config.floatX) tensor = T.as_tensor(tensor_np) mask = T.all(tensor > .5, 2) masked = nn.utils.boolean_mask(tensor, mask) utt.assert_allclose(masked.eval(), tensor_np[np.all(tensor_np > .5, 2)])
def ix_(*args): """ Theano np.ix_ analog See numpy.lib.index_tricks.ix_ for reference """ out = [] nd = len(args) for k, new in enumerate(args): if new is None: out.append(slice(None)) new = tt.as_tensor(new) if new.ndim != 1: raise ValueError("Cross index must be 1 dimensional") new = new.reshape((1, ) * k + (new.size, ) + (1, ) * (nd - k - 1)) out.append(new) return tuple(out)
def __init__(self, factor=numpy.sqrt(2), decay=1.0, min_factor=None, padding=False, **kwargs): super(ConvFMPLayer, self).__init__(**kwargs) if min_factor is None: min_factor = factor factor = T.maximum(factor * (decay**self.network.epoch), numpy.float32(min_factor)) sizes_raw = self.source.output_sizes # handle size problems if not padding: padding = T.min(self.source.output_sizes / factor) <= 0 padding = theano.printing.Print( global_fn=maybe_print_pad_warning)(padding) fixed_sizes = T.maximum( sizes_raw, T.cast( T.as_tensor([ factor + self.filter_height - 1, factor + self.filter_width - 1 ]), 'float32')) sizes = ifelse(padding, fixed_sizes, sizes_raw) X_size = T.cast(T.max(sizes, axis=0), "int32") def pad_fn(x_t, s): x = T.alloc(numpy.cast["float32"](0), X_size[0], X_size[1], self.X.shape[3]) x = T.set_subtensor(x[:s[0], :s[1]], x_t[:s[0], :s[1]]) return x fixed_X, _ = theano.scan( pad_fn, [self.X.dimshuffle(2, 0, 1, 3), T.cast(sizes_raw, "int32")]) fixed_X = fixed_X.dimshuffle(1, 2, 0, 3) self.X = ifelse(padding, T.unbroadcast(fixed_X, 3), self.X) conv_out = CuDNNConvHWBCOpValidInstance(self.X, self.W, self.b) conv_out_sizes = self.conv_output_size_from_input_size(sizes) self.output, self.output_sizes = fmp(conv_out, conv_out_sizes, T.cast(factor, 'float32'))
def test_choice_samples(): with raises(NotImplementedError): choice._shape_from_params(np.asarray(5)) rv_numpy_tester(choice, np.asarray(5)) rv_numpy_tester(choice, np.array([1.0, 5.0], dtype=config.floatX)) rv_numpy_tester(choice, np.asarray(5), 3) with raises(ValueError): rv_numpy_tester(choice, np.array([[1, 2], [3, 4]])) rv_numpy_tester(choice, [1, 2, 3], 1) rv_numpy_tester(choice, [1, 2, 3], 1, p=tt.as_tensor([1 / 3.0, 1 / 3.0, 1 / 3.0])) rv_numpy_tester(choice, [1, 2, 3], (10, 2), replace=True) rv_numpy_tester(choice, tt.as_tensor_variable([1, 2, 3]), 2, replace=True)
def view(self, space, name, reshape=True): """Construct view on a variable from flattened `space` Parameters ---------- space : matrix or vector space to take view of variable from name : `str` name of variable reshape : `bool` whether to reshape variable from vectorized view Returns ------- (reshaped) slice of matrix variable view """ theano_is_here = isinstance(space, tt.TensorVariable) slc = self._view[name].slc _, _, _shape, dtype = self._view[name] if space.ndim == 2: view = space[:, slc] elif space.ndim < 2: view = space[slc] else: # pragma: no cover raise ValueError('Space should have no more than 2 dims, got %d' % space.ndim) if reshape: if len(_shape) > 0: if theano_is_here: shape = tt.concatenate( [space.shape[:-1], tt.as_tensor(_shape)]) else: shape = np.concatenate([space.shape[:-1], _shape]).astype(int) else: shape = space.shape[:-1] if theano_is_here: view = view.reshape(shape, ndim=space.ndim + len(_shape) - 1) else: view = view.reshape(shape) return view.astype(dtype)
def try_to_set_test_value(node_in, node_out, s): _s = s if s is None: s = 1 s = theano.compile.view_op(tt.as_tensor(s)) if not isinstance(node_in, (list, tuple)): node_in = [node_in] if not isinstance(node_out, (list, tuple)): node_out = [node_out] for i, o in zip(node_in, node_out): if hasattr(i.tag, 'test_value'): if not hasattr(s.tag, 'test_value'): continue else: tv = i.tag.test_value[None, ...] tv = np.repeat(tv, s.tag.test_value, 0) if _s is None: tv = tv[0] o.tag.test_value = tv
def shift_right(t, dist, axis, pad=0.0): """ Return the signal shifted by dist along given axis, padded by `pad`. """ assert dist >= 0 t = T.as_tensor(t) if dist == 0: return t p = T.ones_like(t) * pad # Slices ts = [slice(None)] * t.ndim ts[axis] = slice(None, -dist) # only for dim > 0 ps = [slice(None)] * t.ndim ps[axis] = slice(None, dist) res = T.concatenate((p[ps], t[ts]), axis=axis) return res
def store_hds_old(paren_lst, algs): out = [] for which in algs: alg =algs[which] hds_list = [] for i in xrange(len(paren_lst)): hds = [] for alg_paren in alg: #hd = hamming_distance(alg_paren, paren_lst[i]) hd = editdistance.eval(alg_paren, paren_lst[i]) hds.append(hd) #hds = np.array(hds) #hds_t = tt.as_tensor(hds) hds_list.append(copy.deepcopy(hds)) out.append(copy.deepcopy(tt.as_tensor(hds_list))) return out
def my_pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0), mode='max'): """ This function is a patch to the maxpool op of Theano: contrarily to current implementation of maxpool, the gradient is backpropagated to only one input of a given patch if several inputs have the same value. This is consistent with the CuDNN implementation (and therefore the op is replaced by the CuDNN version when possible). """ if input.ndim < 2: raise NotImplementedError('pool_2d requires a dimension >= 2') if not ignore_border is None: # check that ignore_border is True if provided assert ignore_border ignore_border = True if input.ndim == 4: op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def extract_contexts_targets(self, indices_matrix, sentLengths, leftPad): #first pad indices_matrix with zero indices on both side left_padding = T.zeros((indices_matrix.shape[0], self.window), dtype=theano.config.floatX) right_padding = T.zeros((indices_matrix.shape[0], self.window), dtype=theano.config.floatX) matrix_padded = T.concatenate( [left_padding, indices_matrix, right_padding], axis=1) leftPad = leftPad + self.window #a vector plus a number # x, y indices max_length = T.max(sentLengths) x = T.repeat(T.arange(self.batch_size), max_length) y = [] for row in range(self.batch_size): y.append( T.repeat((T.arange(leftPad[row], leftPad[row] + sentLengths[row]), ), max_length, axis=0).flatten()[:max_length]) y = T.concatenate(y, axis=0) #construct xx, yy for context matrix context_x = T.repeat(T.arange(self.batch_size), max_length * self.context_size) #wenpeng=theano.printing.Print('context_x')(context_x) context_y = [] for i in range(self.window, 0, -1): # first consider left window context_y.append(y - i) if not self.only_left_context: for i in range(self.window): # first consider left window context_y.append(y + i + 1) context_y_list = T.concatenate(context_y, axis=0) new_shape = T.cast( T.join(0, T.as_tensor([self.context_size]), T.as_tensor([self.batch_size * max_length])), 'int64') context_y_vector = T.reshape(context_y_list, new_shape, ndim=2).transpose().flatten() new_shape = T.cast( T.join(0, T.as_tensor([self.batch_size]), T.as_tensor([self.context_size * max_length])), 'int64') context_matrix = T.reshape(matrix_padded[context_x, context_y_vector], new_shape, ndim=2) new_shape = T.cast( T.join(0, T.as_tensor([self.batch_size]), T.as_tensor([max_length])), 'int64') target_matrix = T.reshape(matrix_padded[x, y], new_shape, ndim=2) return T.cast(context_matrix, 'int64'), T.cast(target_matrix, 'int64')
def test_mvnormal_ShapeFeature(): M_tt = tt.iscalar("M") M_tt.tag.test_value = 2 d_rv = multivariate_normal(tt.ones((M_tt, )), tt.eye(M_tt), size=2) fg = FunctionGraph( [i for i in tt_inputs([d_rv]) if not isinstance(i, tt.Constant)], [d_rv], clone=False, features=[tt.opt.ShapeFeature()], ) s1, s2 = fg.shape_feature.shape_of[d_rv] assert get_test_value(s1) == 2 assert M_tt in tt_inputs([s2]) # Test broadcasted shapes mean = tt.tensor(config.floatX, [True, False]) mean.tag.test_value = np.array([[0, 1, 2]], dtype=config.floatX) test_covar = np.diag(np.array([1, 10, 100], dtype=config.floatX)) test_covar = np.stack([test_covar, test_covar * 10.0]) cov = tt.as_tensor(test_covar).type() cov.tag.test_value = test_covar d_rv = multivariate_normal(mean, cov, size=[2, 3]) fg = FunctionGraph( [i for i in tt_inputs([d_rv]) if not isinstance(i, tt.Constant)], [d_rv], clone=False, features=[tt.opt.ShapeFeature()], ) s1, s2, s3, s4 = fg.shape_feature.shape_of[d_rv] assert s1.get_test_value() == 2 assert s2.get_test_value() == 3 assert s3.get_test_value() == 2 assert s4.get_test_value() == 3
def __init__(self, z0=None, dim=None, jitter=.001, batch_size=None, local=False): self.local = local self.batch_size = batch_size self.__jitter = jitter if isinstance(z0, AbstractFlow): parent = z0 dim = parent.dim z0 = parent.forward else: parent = None if dim is not None: self.dim = dim else: raise ValueError('Cannot infer dimension of flow, ' 'please provide dim or Flow instance as z0') if z0 is None: self.z0 = tt.matrix() # type: tt.TensorVariable else: self.z0 = tt.as_tensor(z0) self.parent = parent
def tt_logsumexp(x, axis=None, keepdims=False): x_max_ = tt.max(x, axis=axis, keepdims=True) if x_max_.ndim > 0: x_max_ = tt.set_subtensor(x_max_[tt.isinf(x_max_).squeeze()], 0.0) elif tt.isinf(x_max_): x_max_ = tt.as_tensor(0.0) res = tt.sum(tt.exp(x - x_max_), axis=axis, keepdims=keepdims) res = tt.log(res) if not keepdims: # SciPy uses the `axis` keyword here, but Theano doesn't support that. # x_max_ = tt.squeeze(x_max_, axis=axis) axis = np.atleast_1d(axis) if axis is not None else range(x_max_.ndim) x_max_ = x_max_.dimshuffle([ i for i in range(x_max_.ndim) if not x_max_.broadcastable[i] or i not in axis ]) return res + x_max_
def run_null_model(nulls, iter_count=2000, tune_iters=2000): with pm.Model() as model: sd_null = pm.Gamma('sd_null', alpha=.1, beta=1.) b_null = pm.Gamma('b_null', alpha=1., beta=.1) dispersed_prob = pm.Beta('dispersed_prob', alpha=1., beta=1.) pm.Mixture('null', comp_dists=[ pm.Normal.dist(mu=0., sd=sd_null), pm.Laplace.dist(mu=0., b=b_null) ], w=tt.as_tensor([1. - dispersed_prob, dispersed_prob]), observed=nulls) pm.Deterministic('log_prob', model.logpt) trace = pm.sample(iter_count, tune=tune_iters, chains=4) ppc = pm.sample_ppc(trace, samples=iter_count, model=model) return ({'trace': trace, 'ppc': ppc})
def get_aggregator(self): initialized = shared_like(0.) expression_acc = shared_like(self.expression) # Dummy default expression to use as the previously-accumulated # value, that has the same shape as the new result expression_zeros = tensor.as_tensor(self.expression).zeros_like() conditional_update_expr = self.expression + ifelse( initialized, expression_acc, expression_zeros) initialization_updates = [(expression_acc, tensor.zeros_like(expression_acc)), (initialized, 0.)] accumulation_updates = [(expression_acc, conditional_update_expr), (initialized, 1.)] aggregator = Aggregator(aggregation_scheme=self, initialization_updates=initialization_updates, accumulation_updates=accumulation_updates, readout_variable=(expression_acc)) return aggregator
def conv_forward_prop_test(sentences): # layers = T.arange(self.num_layers) k = calculate_k(sentences.shape[-1], 1) # k = tPrint("first k")(k) first_layer = create_convolution_layer( sentences, T.switch(self.dropout, self.filters[0] * 0.8, self.filters[0]), self.folding_layers[0], k, self.biases[0]) k = calculate_k(sentences.shape[-1], 2) # k = tPrint("second k")(k) second_layer = create_convolution_layer( first_layer, T.switch(self.dropout, self.filters[1] * 0.5, self.filters[1]), self.folding_layers[1], k, self.biases[1]) k = T.as_tensor(self.k_top) # k = tPrint("k_top")(k) third_layer = create_convolution_layer( second_layer, T.switch(self.dropout, self.filters[2] * 0.5, self.filters[2]), self.folding_layers[2], k, self.biases[2]) return third_layer
def conv_forward_prop_train(sentences): # layers = T.arange(self.num_layers) k = calculate_k(sentences.shape[-1], 1) # k = tPrint("first k")(k) sentences = T.switch( self.dropout, theano_rand.binomial( sentences.shape, p=0.8, dtype=theano.config.floatX) * sentences, sentences) first_layer = create_convolution_layer(sentences, self.filters[0], self.folding_layers[0], k, self.biases[0]) k = calculate_k(sentences.shape[-1], 2) # k = tPrint("second k")(k) first_layer = T.switch( self.dropout, theano_rand.binomial(first_layer.shape, dtype=theano.config.floatX) * first_layer, first_layer) second_layer = create_convolution_layer(first_layer, self.filters[1], self.folding_layers[1], k, self.biases[1]) k = T.as_tensor(self.k_top) # k = tPrint("k_top")(k) second_layer = T.switch( self.dropout, theano_rand.binomial(second_layer.shape, dtype=theano.config.floatX) * second_layer, second_layer) third_layer = create_convolution_layer(second_layer, self.filters[2], self.folding_layers[2], k, self.biases[2]) third_layer = T.switch( self.dropout, theano_rand.binomial(third_layer.shape, dtype=theano.config.floatX) * third_layer, third_layer) return third_layer
def initial(self, size, no_rand=False, l=None): """Initial distribution for constructing posterior Parameters ---------- size : `int` number of samples no_rand : `bool` return zeros if True l : `int` length of sample, defaults to latent space dim Returns ------- `tt.TensorVariable` sampled latent space """ theano_condition_is_here = isinstance(no_rand, tt.Variable) if l is None: # pragma: no cover l = self.total_size if size is None: shape = (l, ) else: shape = (size, l) shape = tt.stack(*shape) if theano_condition_is_here: no_rand = tt.as_tensor(no_rand) sample = getattr(self._rng, self.initial_dist_name)(shape) space = tt.switch( no_rand, tt.ones_like(sample) * self.initial_dist_map, sample ) else: if no_rand: return tt.ones(shape) * self.initial_dist_map else: return getattr(self._rng, self.initial_dist_name)(shape) return space