def _span_sums(stt, end, p_lens, max_p_len, batch_size, dim, max_ans_len): # Sum of every start element and corresponding max_ans_len end elements. # # stt (max_p_len, batch_size, dim) # end (max_p_len, batch_size, dim) # p_lens (batch_size,) max_ans_len_range = tt.shape_padleft(tt.arange(max_ans_len)) # (1, max_ans_len) offsets = tt.shape_padright(tt.arange(max_p_len)) # (max_p_len, 1) end_idxs = max_ans_len_range + offsets # (max_p_len, max_ans_len) end_idxs_flat = end_idxs.flatten() # (max_p_len*max_ans_len,) end_padded = tt.concatenate( # (max_p_len+max_ans_len-1, batch_size, dim) [end, tt.zeros((max_ans_len-1, batch_size, dim))], axis=0) end_structured = end_padded[end_idxs_flat] # (max_p_len*max_ans_len, batch_size, dim) end_structured = end_structured.reshape( # (max_p_len, max_ans_len, batch_size, dim) (max_p_len, max_ans_len, batch_size, dim)) stt_shuffled = stt.dimshuffle((0,'x',1,2)) # (max_p_len, 1, batch_size, dim) span_sums = stt_shuffled + end_structured # (max_p_len, max_ans_len, batch_size, dim) span_sums_reshaped = span_sums.dimshuffle((2,0,1,3)).reshape( # (batch_size, max_p_len*max_ans_len, dim) (batch_size, max_p_len*max_ans_len, dim)) p_lens_shuffled = tt.shape_padright(p_lens) # (batch_size, 1) end_idxs_flat_shuffled = tt.shape_padleft(end_idxs_flat) # (1, max_p_len*max_ans_len) span_masks_reshaped = tt.lt(end_idxs_flat_shuffled, p_lens_shuffled) # (batch_size, max_p_len*max_ans_len) span_masks_reshaped = cast_floatX(span_masks_reshaped) # (batch_size, max_p_len*max_ans_len, dim), (batch_size, max_p_len*max_ans_len) return span_sums_reshaped, span_masks_reshaped
def create_prediction(self):#做一次predict的方法 gfs=self.gfs pm25in=self.pm25in #初始第一次前传 gfs_x=T.concatenate([gfs[:,0],gfs[:,1],gfs[:,2]],axis=1) pm25in_x=T.concatenate([pm25in[:,0],pm25in[:,1]],axis=1) self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,0]],axis=1)) self.results=self.layerstatus[-1] for i in xrange(1,7):#前6次(0-5),输出之前的先做的6个frame,之后第7次是第1个输出 gfs_x=T.concatenate([gfs_x[:,9:],gfs[:,i+2]],axis=1) pm25in_x=T.concatenate([pm25in_x[:,1:],pm25in[:,i+1]],axis=1) self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,i]],axis=1),self.layerstatus) self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1) if self.steps > 1: gfs_x=T.concatenate([gfs_x[:,9:],gfs[:,9]],axis=1) pm25in_x=T.concatenate([pm25in_x[:,1:],T.shape_padright(self.results[:,-1])],axis=1) self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,7]],axis=1),self.layerstatus) self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1) #前传之后step-2次 for i in xrange(2,self.steps): gfs_x=T.concatenate([gfs_x[:,9:],gfs[:,i+8]],axis=1) pm25in_x=T.concatenate([pm25in_x[:,1:],T.shape_padright(self.results[:,-1])],axis=1) self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,i+6]],axis=1),self.layerstatus) #need T.shape_padright??? self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1) return self.results
def maxpool_3D(input, ds, ignore_border=False): #input.dimshuffle (0, 2, 1, 3, 4) # convert to make video in back. # no need to reshuffle. if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') # extract nr dimensions vid_dim = input.ndim # max pool in two different steps, so we can use the 2d implementation of # downsamplefactormax. First maxpool frames as usual. # Then maxpool the time dimension. Shift the time dimension to the third # position, so rows and cols are in the back # extract dimensions frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size,1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1,]), frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = DownsampleFactorMax((ds[1],ds[2]), ignore_border) # so second and third dimensions of ds are for height and width output = op(input_4D) # restore to original shape outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) # now maxpool time # output (time, rows, cols), reshape so that time is in the back shufl = (list(range(vid_dim-3)) + [vid_dim-2]+[vid_dim-1]+[vid_dim-3]) input_time = out.dimshuffle(shufl) # reset dimensions vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size,1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1,]), vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time op = DownsampleFactorMax((1,ds[0]), ignore_border) # Here the time dimension is downsampled. outtime = op(input_4D_time) # output # restore to original shape (xxx, rows, cols, time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2]) #rval = T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def roc_curves(y_true, y_predicted): "returns roc curves calculated axis -1-wise" fps, tps, thresholds = _binary_clf_curves(y_true, y_predicted) last_col = _last_col_idx(y_true.ndim) fpr = fps.astype('float32') / T.shape_padright(fps[last_col], 1) tpr = tps.astype('float32') / T.shape_padright(tps[last_col], 1) return fpr, tpr, thresholds
def __init__(self, n, p, *args, **kwargs): super(Multinomial, self).__init__(*args, **kwargs) p = p / tt.sum(p, axis=-1, keepdims=True) n = np.squeeze(n) # works also if n is a tensor if len(self.shape) > 1: m = self.shape[-2] try: assert n.shape == (m,) except (AttributeError, AssertionError): n = n * tt.ones(m) self.n = tt.shape_padright(n) self.p = p if p.ndim > 1 else tt.shape_padleft(p) elif n.ndim == 1: self.n = tt.shape_padright(n) self.p = p if p.ndim > 1 else tt.shape_padleft(p) else: # n is a scalar, p is a 1d array self.n = tt.as_tensor_variable(n) self.p = tt.as_tensor_variable(p) self.mean = self.n * self.p mode = tt.cast(tt.round(self.mean), 'int32') diff = self.n - tt.sum(mode, axis=-1, keepdims=True) inc_bool_arr = tt.abs_(diff) > 0 mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()], diff[inc_bool_arr.nonzero()]) self.mode = mode
def getTheanoSimilarityFunction(): """ Return a theano function erforming valid convolution of a filter on an image """ # Define the input variables to the function patches = T.tensor3(dtype='float32') # AxBx(patchsize**2) filters = T.matrix(dtype='float32') # Cx(patchsize**2) globalMean = T.vector(dtype='float32') globalStd = T.vector(dtype='float32') # Perform canonical processing of the patches meanstd = patches.std() mean = T.shape_padright(patches.mean(2), n_ones=1) std = T.shape_padright(patches.std(2) + 0.1 * meanstd, n_ones=1) std = T.shape_padright(patches.std(2) + 1e-6, n_ones=1) canonicalPatches_ = (patches - mean) / std canonicalPatches = (canonicalPatches_ - globalMean) / globalStd # Compute the similarities between each patch and each filter similarities = T.tensordot(canonicalPatches, filters, axes=[[2],[1]]) # AxBxC normFactor = ((canonicalPatches** 2).sum(2) ** 0.5) normFactorPadded = T.shape_padright(normFactor, n_ones=1) # Normalize the similarities by the norm of the patches similaritiesNorm = (similarities / normFactorPadded) # Compile and return the theano function f = theano.function([patches, filters, globalMean, globalStd], similaritiesNorm, on_unused_input='ignore') return f
def sym_mask_logdensity_estimator_intermediate(self, x, mask): non_linearity_name = self.parameters["nonlinearity"].get_name() assert non_linearity_name == "sigmoid" or non_linearity_name == "RLU" x = x.T # BxD mask = mask.T # BxD output_mask = constantX(1) - mask # BxD D = constantX(self.n_visible) d = mask.sum(1) # d is the 1-based index of the dimension whose value to infer (not the size of the context) masked_input = x * mask # BxD h = self.nonlinearity(T.dot(masked_input, self.W1) + T.dot(mask, self.Wflags) + self.b1) # BxH for l in xrange(self.n_layers - 1): h = self.nonlinearity(T.dot(h, self.Ws[l]) + self.bs[l]) # BxH z_alpha = T.tensordot(h, self.V_alpha, [[1], [1]]) + T.shape_padleft(self.b_alpha) z_mu = T.tensordot(h, self.V_mu, [[1], [1]]) + T.shape_padleft(self.b_mu) z_sigma = T.tensordot(h, self.V_sigma, [[1], [1]]) + T.shape_padleft(self.b_sigma) temp = T.exp(z_alpha) # + 1e-6 # temp += T.shape_padright(temp.sum(2)/1e-3) Alpha = temp / T.shape_padright(temp.sum(2)) # BxDxC Mu = z_mu # BxDxC Sigma = T.exp(z_sigma) # + 1e-6 #BxDxC # Alpha = Alpha * T.shape_padright(output_mask) + T.shape_padright(mask) # Mu = Mu * T.shape_padright(output_mask) # Sigma = Sigma * T.shape_padright(output_mask) + T.shape_padright(mask) # Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x*output_mask)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2*np.pi)) #BxDxC Phi = ( -constantX(0.5) * T.sqr((Mu - T.shape_padright(x)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2 * np.pi)) ) # BxDxC logdensity = (log_sum_exp(Phi + T.log(Alpha), axis=2) * output_mask).sum(1) * D / (D - d) return (logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h)
def create_prediction(self): # 做一次predict的方法 gfs = self.gfs pm25in = self.pm25in # 初始第一次前传 self.layerstatus = self.model.forward( T.concatenate([gfs[:, 0], gfs[:, 1], gfs[:, 2], pm25in[:, 0], pm25in[:, 1], self.cnt[:, :, 0]], axis=1) ) # results.shape?40*1 self.results = self.layerstatus[-1] if self.steps > 1: self.layerstatus = self.model.forward( T.concatenate([gfs[:, 1], gfs[:, 2], gfs[:, 3], pm25in[:, 1], self.results, self.cnt[:, :, 1]], axis=1), self.layerstatus, ) self.results = T.concatenate([self.results, self.layerstatus[-1]], axis=1) # 前传之后step-2次 for i in xrange(2, self.steps): self.layerstatus = self.model.forward( T.concatenate( [ gfs[:, i], gfs[:, i + 1], gfs[:, i + 2], T.shape_padright(self.results[:, i - 2]), T.shape_padright(self.results[:, i - 1]), self.cnt[:, :, i], ], axis=1, ), self.layerstatus, ) # need T.shape_padright??? self.results = T.concatenate([self.results, self.layerstatus[-1]], axis=1) return self.results
def _warp_times(self, t): delta = tt.shape_padleft(t) / tt.shape_padright(self.period, t.ndim) delta += tt.shape_padright(self._base_time, t.ndim) ind = tt.cast(tt.floor(delta), "int64") dt = tt.stack([ttv[tt.clip(ind[i], 0, ttv.shape[0]-1)] for i, ttv in enumerate(self.ttvs)], -1) return tt.shape_padright(t) + dt
def prediction(self, h, bias): srng = RandomStreams(seed=42) prop, mean_x, mean_y, std_x, std_y, rho, bernoulli = \ self.compute_parameters(h, bias) mode = T.argmax(srng.multinomial(pvals=prop, dtype=prop.dtype), axis=1) v = T.arange(0, mean_x.shape[0]) m_x = mean_x[v, mode] m_y = mean_y[v, mode] s_x = std_x[v, mode] s_y = std_y[v, mode] r = rho[v, mode] # cov = r * (s_x * s_y) normal = srng.normal((h.shape[0], 2)) x = normal[:, 0] y = normal[:, 1] # x_n = T.shape_padright(s_x * x + cov * y + m_x) # y_n = T.shape_padright(s_y * y + cov * x + m_y) x_n = T.shape_padright(m_x + s_x * x) y_n = T.shape_padright(m_y + s_y * (x * r + y * T.sqrt(1.-r**2))) uniform = srng.uniform((h.shape[0],)) pin = T.shape_padright(T.cast(bernoulli > uniform, floatX)) return T.concatenate([x_n, y_n, pin], axis=1)
def filter_spike_train(n,S,taus): """ Helper function to filter the spike train """ filt = T.shape_padright(filt_fn(taus[n]), n_ones=1) filtered_S = conv2d(T.shape_padright(S[:,n], n_ones=1), filt, border_mode='full') return filtered_S[0,:,0]
def __call__(self, crf, X, Y, mask=None, flank=0): Yh = self.decode(crf, X, Y) L = self.loss(Yh, Y) C = confusion(T.argmax(Yh,axis=-1), Y, Yh.shape[-1]) if mask is not None: L *= T.shape_padright(mask) C *= T.shape_padright(T.shape_padright(mask)) n = Yh.shape[0] return L[flank:n-flank], C[flank:n-flank]
def dfe_dlhat(self, g_hat, h_hat, l_hat, v): # term from loss function dloss_dl = self.label_multiplier * (T.dot(h_hat, self.Whl) + self.lbias) rval = dloss_dl * l_hat - l_hat * T.shape_padright(T.sum(l_hat * dloss_dl, axis=1)) # term from entropy. # dentropy = T.sum(-l_hat * T.log(l_hat), axis=1) dentropy = - T.xlogx.xlogx(l_hat) - l_hat +\ l_hat * T.shape_padright(T.sum(T.xlogx.xlogx(l_hat) + l_hat, axis=1)) return rval + dentropy
def _theano_confusion(self, Yh, Y, mask): Yh = T.argmax(Yh, axis=-1) shape = list(Yh.shape) + [self.n_out, self.n_out] C = T.zeros(shape, dtype='int64') i,j = T.mgrid[0:C.shape[0], 0:C.shape[1]] C = T.set_subtensor(C[i,j,Y,Yh], 1) mask = T.shape_padright(T.shape_padright(mask)) C = C*mask return C
def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activations_factor) # BxH Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha)) # BxC Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) # BxC Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma))) # BxC p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma) return (p, a, x)
def loss(self, X, mask=None, flank=0, Z=None): if Z is None: Z = self.transform(self.noise(X), mask=mask) E = self.emit(Z) L = cross_entropy(E, X) C = confusion(T.argmax(E,axis=-1), X, E.shape[-1]) if mask is not None: L *= T.shape_padright(mask) C *= T.shape_padright(T.shape_padright(mask)) n = X.shape[0] return L[flank:n-flank], C[flank:n-flank]
def max_pool_3d(input, ds, ignore_border=False): """ Takes as input a N-D tensor, where N >= 3. It downscales the input video by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1],ds[2]) (time, height, width) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 3 last dimensions. :type ds: tuple of length 3 :param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension. :param ignore_border: boolean value. Example when True, (5,5,5) input with ds=(2,2,2) will generate a (2,2,2) output. (3,3,3) otherwise. """ if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') vid_dim = input.ndim #Maxpool frame frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size,1) new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = DownsampleFactorMax((ds[1],ds[2]), ignore_border) output = op(input_4D) # restore to original shape outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) #Maxpool time # output (time, rows, cols), reshape so that time is in the back shufl = (list(range(vid_dim-4)) + list(range(vid_dim-3,vid_dim))+[vid_dim-4]) input_time = out.dimshuffle(shufl) # reset dimensions vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size,1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time op = DownsampleFactorMax((1,ds[0]), ignore_border) outtime = op(input_4D_time) # restore to original shape (xxx, rows, cols, time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim-4)) + [vid_dim-1] + list(range(vid_dim-4,vid_dim-1))) #shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2]) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def apply(self, input_, application_call): """Apply the linear transformation followed by masking with noise. Parameters ---------- input_ : :class:`~tensor.TensorVariable` The input on which to apply the transformations Returns ------- output : :class:`~tensor.TensorVariable` The transformed input """ # When not in training mode, turn off noise if not self._training_mode: return input_ if self.tied_sigma: average = tensor.shape_padright(self.flatten.apply(input_), 2) noise_level = (self.prior_noise_level - tensor.clip(self.mask.apply(average), -16, 16)) noise_level = tensor.patternbroadcast(noise_level, (False, False, True, True)) noise_level = copy_and_tag_noise( noise_level, self, LOG_SIGMA, 'log_sigma') else: average = input_ noise_level = (self.prior_noise_level - tensor.clip(self.mask.apply(input_), -16, 16)) noise_level = copy_and_tag_noise( noise_level, self, LOG_SIGMA, 'log_sigma') # Allow incomplete batches by just taking the noise that is needed if self.tied_noise: if self.noise_batch_size is not None: noise = self.parameters[0][:input_.shape[0], :] else: noise = self.theano_rng.normal(input_.shape[0:2]) noise = tensor.shape_padright(2) else: if self.noise_batch_size is not None: noise = self.parameters[0][:input_.shape[0], :, :, :] else: noise = self.theano_rng.normal(input_.shape) kl = ( self.prior_noise_level - noise_level + 0.5 * ( tensor.exp(2 * noise_level) + (average - self.prior_mean) ** 2 ) / tensor.exp(2 * self.prior_noise_level) - 0.5 ) application_call.add_auxiliary_variable(kl, roles=[NITS], name='nits') return input_ + self.noise_rate * tensor.exp(noise_level) * noise
def __init__(self, eta, cutpoints, *args, **kwargs): self.eta = tt.as_tensor_variable(eta) self.cutpoints = tt.as_tensor_variable(cutpoints) pa = sigmoid(tt.shape_padleft(self.cutpoints) - tt.shape_padright(self.eta)) p_cum = tt.concatenate([ tt.zeros_like(tt.shape_padright(pa[:, 0])), pa, tt.ones_like(tt.shape_padright(pa[:, 0])) ], axis=1) p = p_cum[:, 1:] - p_cum[:, :-1] super().__init__(p=p, *args, **kwargs)
def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma,activation_factor, p_prev, a_prev, x_prev,): a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1)) h = self.nonlinearity(a * activation_factor) # BxH Alpha = T.nnet.softmax(T.dot(h, V_alpha) + b_alpha) # BxC #Alpha = theano.printing.Print('Alpha')(Alpha) Mu = T.dot(h, V_mu) + b_mu # BxC #Mu = theano.printing.Print('Mu')(Mu) Sigma = T.exp(T.dot(h, V_sigma) + b_sigma) # BxC #Sigma = theano.printing.Print('Sigma')(Sigma) arg = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * numpy.log(2 * numpy.pi)) + T.log(Alpha) #arg = theano.printing.Print('Mu')(arg) p = p_prev + log_sum_exp(arg) return (p, a, x)
def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activation_factor, a_i, lp_accum, dP_da_ip1): B = T.cast(x_i.shape[0], theano.config.floatX) pot = a_i * activation_factor h = self.nonlinearity(pot) # BxH z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha) z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma) Alpha = T.nnet.softmax(z_alpha) # BxC Mu = z_mu # BxC Sigma = T.exp(z_sigma) # BxC Phi = -T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x_i, 1)) / Sigma wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0)) lp_current = log_sum_exp(wPhi) # lp_current_sum = T.sum(lp_current) Pi = T.exp(wPhi - T.shape_padright(lp_current, 1)) # # dp_dz_alpha = Pi - Alpha # BxC # dp_dz_alpha = T.grad(lp_current_sum, z_alpha) gb_alpha = dp_dz_alpha.mean(0, dtype=theano.config.floatX) # C gV_alpha = T.dot(h.T, dp_dz_alpha) / B # HxC # dp_dz_mu = T.grad(lp_current_sum, z_mu) dp_dz_mu = Pi * T.sgn(T.shape_padright(x_i, 1) - Mu) / Sigma # dp_dz_mu = dp_dz_mu * Sigma gb_mu = dp_dz_mu.mean(0, dtype=theano.config.floatX) gV_mu = T.dot(h.T, dp_dz_mu) / B # dp_dz_sigma = T.grad(lp_current_sum, z_sigma) dp_dz_sigma = Pi * (T.abs_(T.shape_padright(x_i, 1) - Mu) / Sigma - 1) gb_sigma = dp_dz_sigma.mean(0, dtype=theano.config.floatX) gV_sigma = T.dot(h.T, dp_dz_sigma) / B dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot(dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T) # BxH if non_linearity_name == "sigmoid": dp_dpot = dp_dh * h * (1 - h) elif non_linearity_name == "RLU": dp_dpot = dp_dh * (pot > 0) gfact = (dp_dpot * a_i).sum(1).mean(0, dtype=theano.config.floatX) # 1 dP_da_i = dP_da_ip1 + dp_dpot * activation_factor # BxH gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B return (a_i - T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)), lp_accum + lp_current, dP_da_i, gW, gb_alpha, gV_alpha, gb_mu, gV_mu, gb_sigma, gV_sigma, gfact)
def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activation_factor, a_i, lp_accum, dP_da_ip1): B = T.cast(x_i.shape[0], floatX) pot = a_i * activation_factor h = self.nonlinearity(pot) # BxH z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha) z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu) z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma) Alpha = T.nnet.softmax(z_alpha) # BxC Mu = z_mu # BxC Sigma = T.exp(z_sigma) # BxC Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x_i, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * numpy.log(2 * numpy.pi)) wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0)) lp_current = -log_sum_exp(wPhi) # negative log likelihood # lp_current_sum = T.sum(lp_current) Pi = T.exp(wPhi - T.shape_padright(lp_current, 1)) # # dp_dz_alpha = Pi - Alpha # BxC # dp_dz_alpha = T.grad(lp_current_sum, z_alpha) gb_alpha = dp_dz_alpha.mean(0, dtype=floatX) # C gV_alpha = T.dot(h.T, dp_dz_alpha) / B # HxC dp_dz_mu = -Pi * (Mu - T.shape_padright(x_i, 1)) / T.sqr(Sigma) # dp_dz_mu = T.grad(lp_current_sum, z_mu) dp_dz_mu = dp_dz_mu * Sigma # Heuristic gb_mu = dp_dz_mu.mean(0, dtype=floatX) gV_mu = T.dot(h.T, dp_dz_mu) / B dp_dz_sigma = Pi * (T.sqr(T.shape_padright(x_i, 1) - Mu) / T.sqr(Sigma) - 1) # dp_dz_sigma = T.grad(lp_current_sum, z_sigma) gb_sigma = dp_dz_sigma.mean(0, dtype=floatX) gV_sigma = T.dot(h.T, dp_dz_sigma) / B dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot(dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T) # BxH if self.hidden_act == "sigmoid": dp_dpot = dp_dh * h * (1 - h) elif self.hidden_act == "ReLU": dp_dpot = dp_dh * (pot > 0) gfact = (dp_dpot * a_i).sum(1).mean(0, dtype=floatX) # 1 dP_da_i = dP_da_ip1 + dp_dpot * activation_factor # BxH gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B return (a_i - T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)), lp_accum + lp_current, dP_da_i, gW, gb_alpha, gV_alpha, gb_mu, gV_mu, gb_sigma, gV_sigma, gfact)
def create_prediction(self):#做一次predict的方法 gfs=self.gfs pm25in=self.pm25in #初始第一次前传 self.layerstatus=self.model.forward(T.concatenate([gfs[0],gfs[1],gfs[2],pm25in[0],pm25in[1]],axis=0)) self.results=T.shape_padright(self.layerstatus[-1]) if self.steps > 1: self.layerstatus=self.model.forward(T.concatenate([gfs[1],gfs[2],gfs[3],pm25in[1],self.results[0]],axis=0),self.layerstatus) self.results=T.concatenate([self.results,T.shape_padright(self.layerstatus[-1])],axis=0) #前传之后step-2次 for i in xrange(2,self.steps): self.layerstatus=self.model.forward(T.concatenate([gfs[i],gfs[i+1],gfs[i+2],self.results[i-2],self.results[i-1]],axis=0),self.layerstatus) #need T.shape_padright??? self.results=T.concatenate([self.results,T.shape_padright(self.layerstatus[-1])],axis=0) return self.results
def decode_to_probs(self, activations, relative_position, low_bound, high_bound): assert (low_bound%12==0) and (high_bound-low_bound == self.num_octaves*12), "Circle of thirds must evenly divide into octaves" squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH)) rsp = T.nnet.softmax(squashed[:,:3]) c1 = T.nnet.softmax(squashed[:,3:7]) c2 = T.nnet.softmax(squashed[:,7:10]) octave_choice = T.nnet.softmax(squashed[:,10:]) octave_notes = T.tile(c1,(1,3)) * T.tile(c2,(1,4)) full_notes = T.reshape(T.shape_padright(octave_choice) * T.shape_padaxis(octave_notes, 1), (-1,12*self.num_octaves)) full_probs = T.concatenate([rsp[:,:2], T.shape_padright(rsp[:,2])*full_notes], 1) newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0) fixed = T.reshape(full_probs, newshape, ndim=activations.ndim) return fixed
def max_pool_3d(input, ds, ignore_border=False): # [n,c,x,y,z]以外の入力は受け付けない if input.ndim != 5: raise NotImplementedError( 'max_pool_3d requires a input [n, c, x, y, z]') # 入力次元 vid_dim = input.ndim # [y, z]フレームの次元数 frame_shape = input.shape[-2:] # バッチサイズ # フレーム次元以外の全ての次元の要素数を掛け合わせる batch_size = T.prod(input.shape[:-2]) # http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.shape_padright batch_size = T.shape_padright(batch_size, 1) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1, ]), frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) op = DownsampleFactorMax((ds[1], ds[2]), ignore_border) output = op(input_4D) outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) shufl = ( list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [ vid_dim - 3]) input_time = out.dimshuffle(shufl) vid_shape = input_time.shape[-2:] batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1, ]), vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) op = DownsampleFactorMax((1, ds[0]), ignore_border) outtime = op(input_4D_time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = ( list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [ vid_dim - 2]) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def _create_maximum_activation_update(output, record, streamindex, topn): """ Calculates update of the topn maximums for one batch of outputs. """ dims, maximums, indices, snapshot = record counters = tensor.tile(tensor.shape_padright( tensor.arange(output.shape[0]) + streamindex), (1, output.shape[1])) if len(dims) == 1: # output is a 2d tensor, (cases, units) -> activation tmax = output # counters is a 2d tensor broadcastable (cases, units) -> case_index tind = counters else: # output is a 4d tensor: fmax flattens it to 3d fmax = output.flatten(ndim=3) # fargmax is a 2d tensor containing rolled maximum locations fargmax = fmax.argmax(axis=2) # fetch the maximum. tmax is 2d, (cases, units) -> activation tmax = _apply_index(fmax, fargmax, axis=2) # targmax is a tuple that separates rolled-up location into (x, y) targmax = divmod(fargmax, dims[2]) # tind is a 3d tensor (cases, units, 3) -> case_index, maxloc # this will match indices which is a 3d tensor also tind = tensor.stack((counters, ) + targmax, axis=2) cmax = tensor.concatenate((maximums, tmax), axis=0) cind = tensor.concatenate((indices, tind), axis=0) cargsort = (-cmax).argsort(axis=0)[:topn] newmax = _apply_perm(cmax, cargsort, axis=0) newind = _apply_perm(cind, cargsort, axis=0) updates = [(maximums, newmax), (indices, newind)] if snapshot: csnap = tensor.concatenate((snapshot, output), axis=0) newsnap = _apply_perm(csnap, cargsort, axis=0) updates.append((snapshot, newsnap)) return updates
def predict_all(self, latitude, longitude, latitude_mask, **kwargs): latitude = (latitude - data.train_gps_mean[0]) / data.train_gps_std[0] longitude = (longitude - data.train_gps_mean[1]) / data.train_gps_std[1] pre_emb = tuple(self.pre_context_embedder.apply(**kwargs)) latitude = tensor.shape_padright(latitude) longitude = tensor.shape_padright(longitude) itr = self.input_to_rec.apply(tensor.concatenate(pre_emb + (latitude, longitude), axis=1)) itr = itr.repeat(4, axis=1) (next_states, next_cells) = self.rec.apply(itr, kwargs['states'], kwargs['cells'], mask=latitude_mask, iterate=False) post_emb = tuple(self.post_context_embedder.apply(**kwargs)) rto = self.rec_to_output.apply(tensor.concatenate(post_emb + (next_states,), axis=1)) rto = (rto * data.train_gps_std) + data.train_gps_mean return (rto, next_states, next_cells)
def cost(self): """ :rtype: (theano.Variable | None, dict[theano.Variable,theano.Variable] | None) :returns: cost, known_grads """ known_grads = None if self.loss == 'ce' or self.loss == 'priori': if self.attrs.get("target", "").endswith("[sparse:coo]"): assert isinstance(self.y, tuple) assert len(self.y) == 3 from NativeOp import crossentropy_softmax_and_gradient_z_sparse y_mask = self.network.j[self.attrs.get("target", "").replace("[sparse:coo]", "[sparse:coo:2:0]")] ce, grad_z = crossentropy_softmax_and_gradient_z_sparse( self.z, self.index, self.y[0], self.y[1], self.y[2], y_mask) return self.norm * T.sum(ce), {self.z: grad_z} if self.y_data_flat.type == T.ivector().type: # Use crossentropy_softmax_1hot to have a more stable and more optimized gradient calculation. # Theano fails to use it automatically; I guess our self.i indexing is too confusing. #idx = self.index.flatten().dimshuffle(0,'x').repeat(self.y_m.shape[1],axis=1) # faster than line below #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m * idx, y_idx=self.y_data_flat * self.index.flatten()) nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y_data_flat[self.i]) #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat) #nll = -T.log(T.nnet.softmax(self.y_m)[self.i,self.y_data_flat[self.i]]) #z_c = T.exp(self.z[:,self.y]) #nll = -T.log(z_c / T.sum(z_c,axis=2,keepdims=True)) #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat) #nll = T.set_subtensor(nll[self.j], T.constant(0.0)) else: nll = -T.dot(T.log(T.clip(self.p_y_given_x[self.i], 1.e-38, 1.e20)), self.y_data_flat[self.i].T) return self.norm * T.sum(nll), known_grads elif self.loss == 'entropy': h_e = T.exp(self.y_m) #(TB) pcx = T.clip((h_e / T.sum(h_e, axis=1, keepdims=True)).reshape((self.index.shape[0],self.index.shape[1],self.attrs['n_out'])), 1.e-6, 1.e6) # TBD ee = -T.sum(pcx[self.i] * T.log(pcx[self.i])) # TB #nll, pcxs = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y[self.i]) nll, _ = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat) # TB ce = nll.reshape(self.index.shape) * self.index # TB y = self.y_data_flat.reshape(self.index.shape) * self.index # TB f = T.any(T.gt(y,0), axis=0) # B return T.sum(f * T.sum(ce, axis=0) + (1-f) * T.sum(ee, axis=0)), known_grads #return T.sum(T.switch(T.gt(T.sum(y,axis=0),0), T.sum(ce, axis=0), -T.sum(ee, axis=0))), known_grads #return T.switch(T.gt(T.sum(self.y_m[self.i]),0), T.sum(nll), -T.sum(pcx * T.log(pcx))), known_grads elif self.loss == 'priori': pcx = self.p_y_given_x[self.i, self.y_data_flat[self.i]] pcx = T.clip(pcx, 1.e-38, 1.e20) # For pcx near zero, the gradient will likely explode. return -T.sum(T.log(pcx)), known_grads elif self.loss == 'sse': if self.y_data_flat.dtype.startswith('int'): y_f = T.cast(T.reshape(self.y_data_flat, (self.y_data_flat.shape[0] * self.y_data_flat.shape[1]), ndim=1), 'int32') y_oh = T.eq(T.shape_padleft(T.arange(self.attrs['n_out']), y_f.ndim), T.shape_padright(y_f, 1)) return T.mean(T.sqr(self.p_y_given_x[self.i] - y_oh[self.i])), known_grads else: #return T.sum(T.sum(T.sqr(self.y_m - self.y.reshape(self.y_m.shape)), axis=1)[self.i]), known_grads return T.sum(T.sqr(self.y_m[self.i] - self.y_data_flat.reshape(self.y_m.shape)[self.i])), known_grads #return T.sum(T.sum(T.sqr(self.z - (self.y.reshape((self.index.shape[0], self.index.shape[1], self.attrs['n_out']))[:self.z.shape[0]])), axis=2).flatten()[self.i]), known_grads #y_z = T.set_subtensor(T.zeros((self.index.shape[0],self.index.shape[1],self.attrs['n_out']), dtype='float32')[:self.z.shape[0]], self.z).flatten() #return T.sum(T.sqr(y_z[self.i] - self.y[self.i])), known_grads #return T.sum(T.sqr(self.y_m - self.y[:self.z.shape[0]*self.index.shape[1]]).flatten()[self.i]), known_grads else: assert False, "unknown loss: %s" % self.loss
def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0), mode='max'): """ Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 2 last dimensions. :type ds: tuple of length 2 :param ds: factor by which to downscale (vertical ds, horizontal ds). (2,2) will halve the image in each dimension. :type ignore_border: bool :param ignore_border: When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. :type st: tuple of lenght 2 :param st: stride size, which is the number of shifts over rows/cols to get the the next pool region. if st is None, it is considered equal to ds (no overlap on pooling regions) :param padding: (pad_h, pad_w), pad zeros to extend beyond four borders of the images, pad_h is the size of the top and bottom margins, and pad_w is the size of the left and right margins. :type padding: tuple of two ints :param mode: 'max', 'average_inc_pad' or 'average_exc_pad'. Operation executed on each window. `max` always excludes the padding in the computation. `average` gives you the choice to include or exclude it. :type mode: string """ if input.ndim < 2: raise NotImplementedError('max_pool_2d requires a dimension >= 2') if input.ndim == 4: op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def one_hot(self,t, r=None): if r is None: r = T.max(t) + 1 ranges = T.shape_padleft(T.arange(r), t.ndim) return T.cast(T.eq(ranges, T.shape_padright(t, 1)) ,dtype =theano.config.floatX)
def log_likelihood(self, X, Y=None, n_samples=None): p_layers = self.p_layers q_layers = self.q_layers n_layers = len(p_layers) if n_samples == None: n_samples = self.n_samples batch_size = X.shape[0] # Get samples X = f_replicate_batch(X, n_samples) samples, log_p, log_q = self.sample_q(X, None) # Reshape and sum log_p_all = T.zeros((batch_size, n_samples)) log_q_all = T.zeros((batch_size, n_samples)) for l in xrange(n_layers): samples[l] = samples[l].reshape( (batch_size, n_samples, p_layers[l].n_X)) log_q[l] = log_q[l].reshape((batch_size, n_samples)) log_p[l] = log_p[l].reshape((batch_size, n_samples)) log_p_all += log_p[l] # agregate all layers log_q_all += log_q[l] # agregate all layers # Approximate log P(X) log_px = f_logsumexp(log_p_all - log_q_all, axis=1) - T.log(n_samples) # Calculate samplig weights log_pq = (log_p_all - log_q_all - T.log(n_samples)) w_norm = f_logsumexp(log_pq, axis=1) log_w = log_pq - T.shape_padright(w_norm) w = T.exp(log_w) # Calculate KL(P|Q), Hp, Hq KL = [None] * n_layers Hp = [None] * n_layers Hq = [None] * n_layers for l in xrange(n_layers): KL[l] = T.sum(w * (log_p[l] - log_q[l]), axis=1) Hp[l] = f_logsumexp(log_w + log_p[l], axis=1) Hq[l] = T.sum(w * log_q[l], axis=1) return log_px, w, log_p_all, log_q_all, KL, Hp, Hq
def queue_transform(feature_strengths, feature_vects, return_strengths=False): """ Process features according to a "fragmented queue", where each timestep gets a size-1 window onto a feature queue. Effectively, feature_strengths gives how much to push onto queue feature_vects gives what to push on pop weights are tied to feature_strengths output is a size-1 peek (without popping) Parameters: - feature_strengths: float32 tensor of shape (batch, push_timestep) in [0,1] - feature_vects: float32 tensor of shape (batch, push_timestep, feature_dim) Returns: - peek_vects: float32 tensor of shape (batch, timestep, feature_dim) """ n_batch, n_time, n_feature = feature_vects.shape cum_sum_str = T.extra_ops.cumsum(feature_strengths, 1) # We will be working in (batch, timestep, push_timestep) # For each timestep, if we subtract out the sum of pushes before that timestep # and then cap to 0-1 we get the cumsums for just the features active in that # timestep timestep_adjustments = T.shape_padright(cum_sum_str - feature_strengths) push_time_cumsum = T.shape_padaxis(cum_sum_str, 1) relative_cumsum = push_time_cumsum - timestep_adjustments capped_cumsum = T.minimum(T.maximum(relative_cumsum, 0), 1) # Now we can recover the peek strengths by taking a diff shifted = T.concatenate( [T.zeros((n_batch, n_time, 1)), capped_cumsum[:, :, :-1]], 2) peek_strengths = capped_cumsum - shifted # Peek strengths is now (batch, timestep, push_timestep) result = T.batched_dot(peek_strengths, feature_vects) if return_strengths: return peek_strengths, result else: return result
def take_look(self, sequence, preprocessed_sequence=None, mask=None, **states): r"""Compute attention weights and produce glimpses. Parameters ---------- sequence : :class:`~tensor.TensorVariable` The sequence, time is the 1-st dimension. preprocessed_sequence : :class:`~tensor.TensorVariable` The preprocessed sequence. If ``None``, is computed by calling :meth:`preprocess`. mask : :class:`~tensor.TensorVariable` A 0/1 mask specifying available data. 0 means that the corresponding sequence element is fake. \*\*states The states of the agent. Returns ------- glimpses : theano variable Linear combinations of sequence elements with the attention weights. weights : theano variable The attention weights. The first dimension is batch, the second is time. """ if not preprocessed_sequence: preprocessed_sequence = self.preprocess(sequence) transformed_states = self.state_transformers.apply(return_dict=True, **states) # Broadcasting of transformed states should be done automatically match_vectors = sum(transformed_states.values(), preprocessed_sequence) energies = self.energy_computer.apply(match_vectors).reshape( match_vectors.shape[:-1], ndim=match_vectors.ndim - 1) unormalized_weights = tensor.exp(energies) if mask: unormalized_weights *= mask weights = unormalized_weights / unormalized_weights.sum(axis=0) glimpses = (tensor.shape_padright(weights) * sequence).sum(axis=0) return glimpses, weights.dimshuffle(1, 0)
def max_pool_2d(input, ds, ignore_border=False, st=None): """ Takes as input a N-D tensor, where N >= 2. It downscales the input image by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1]) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 2 last dimensions. :type ds: tuple of length 2 :param ds: factor by which to downscale (vertical ds, horizontal ds). (2,2) will halve the image in each dimension. :type ignore_border: bool :param ignore_border: When True, (5,5) input with ds=(2,2) will generate a (2,2) output. (3,3) otherwise. :type st: tuple of lenght 2 :param st: stride size, which is the number of shifts over rows/cols to get the the next pool region. if st is None, it is considered equal to ds (no overlap on pooling regions) """ if input.ndim < 2: raise NotImplementedError('max_pool_2d requires a dimension >= 2') # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast( tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = DownsampleFactorMax(ds, ignore_border, st=st) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def get_means_and_covs(self, X, X_embedded): """ Get the mean and the covariance for the distribution for the code z :param X: (N x max(L)) matrix representing the text :param X_embedded: (N x max(L) x E) tensor representing the embedded text :return: variational mean and covariance for the latents given a sentence """ # If x is less or equal than 0 then return 0, else 1 (used to filter out words) mask = T.switch(T.lt(X, 0), 0, 1) # N x max(L) # Reshape the embedding of X adding a singleton dimension on the right X_embedded *= T.shape_padright(mask) # N x max(L) x E x 1 (broadcastable) means = get_output(self.mean_nn, X_embedded) # N x Z covs = get_output(self.cov_nn, X_embedded) # N x Z return means, covs
def build_NADE(self, v, W, V, b, c): a = T.shape_padright(v) * T.shape_padleft(W) a = a.dimshuffle(1, 0, 2) c_init = c if c.ndim == 1: c_init = T.dot(T.ones((v.shape[0], 1)), T.shape_padleft(c)) (activations, s), updates = theano.scan( lambda V_i, a_i, partial_im1: (a_i + partial_im1, T.dot(V_i, T.nnet.sigmoid(partial_im1.T))), sequences=[V.T, a], outputs_info=[c_init, None]) s = s.T + b y = T.nnet.sigmoid(s) cost = -v * T.log(y) - (1 - v) * T.log(1 - y) cost = cost.sum() / v.shape[0] return s, y, cost
def my_pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0), mode='max'): """ This function is a patch to the maxpool op of Theano: contrarily to current implementation of maxpool, the gradient is backpropagated to only one input of a given patch if several inputs have the same value. This is consistent with the CuDNN implementation (and therefore the op is replaced by the CuDNN version when possible). """ if input.ndim < 2: raise NotImplementedError('pool_2d requires a dimension >= 2') if not ignore_border is None: # check that ignore_border is True if provided assert ignore_border ignore_border = True if input.ndim == 4: op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input) return output # extract image dimensions img_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast(tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of images op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode) output = op(input_4D) # restore to original shape outshp = tensor.join(0, input.shape[:-2], output.shape[-2:]) return tensor.reshape(output, outshp, ndim=input.ndim)
def __init__(self, rbm, units, dimensions, shared_dimensions, b, name=None): super(SharedBiasParameters, self).__init__(rbm, [units], name=name) self.var = b self.variables = [self.var] self.u = units self.ud = dimensions self.sd = shared_dimensions self.nd = self.ud - self.sd self.terms[self.u] = lambda vmap: T.shape_padright(self.var, self.sd) self.energy_gradients[self.var] = lambda vmap: T.mean( vmap[self.u], axis=self._shared_axes(vmap))
def train(self, data): data = np.asarray(data, dtype=theano.config.floatX) val = T.vector('val') min_idx = self.min_dist(val) self.nhood_radius = self.radius * T.exp(-self.it / self.time_const) dists = self.in_neighbourhood(self.lattice[min_idx]) in_nhood = dists < self.nhood_radius lr = self.start_lr * T.exp(-self.it / self.iterations) updates = [(self.W, self.W + self.compute_influence(dists) * T.shape_padright(in_nhood, 1) * lr * (val - self.W))] epoch = theano.function(inputs=[val], outputs=self.ret_w(), updates=updates) update_iteration = theano.function(inputs=[], outputs=self.it, updates={self.it: self.it + 1}) self.last_W = None for i in range(self.iterations): index = np.random.random_integers(0, len(data) - 1) self.last_W = epoch(data[index]) #index += 1 #if index >= len(data): # index = 0 self.it = update_iteration() print i # print an image every 100 epochs if (i + 1) % self.print_fr == 0: self.print_image(i)
def build_aligner(self): tgt_action_seq = ndim_itensor(3, 'tgt_action_seq') tgt_action_seq_type = ndim_itensor(3, 'tgt_action_seq_type') tgt_node_seq = ndim_itensor(2, 'tgt_node_seq') tgt_par_rule_seq = ndim_itensor(2, 'tgt_par_rule_seq') tgt_par_t_seq = ndim_itensor(2, 'tgt_par_t_seq') tgt_node_embed = self.node_embedding[tgt_node_seq] query_tokens = ndim_itensor(2, 'query_tokens') query_token_embed, query_token_embed_mask = self.query_embedding( query_tokens, mask_zero=True) batch_size = tgt_action_seq.shape[0] max_example_action_num = tgt_action_seq.shape[1] tgt_action_seq_embed = T.switch(T.shape_padright(tgt_action_seq[:, :, 0] > 0), self.rule_embedding_W[tgt_action_seq[:, :, 0]], self.vocab_embedding_W[tgt_action_seq[:, :, 1]]) tgt_action_seq_embed_tm1 = tensor_right_shift(tgt_action_seq_embed) tgt_par_rule_embed = T.switch(tgt_par_rule_seq[:, :, None] < 0, T.alloc(0., 1, config.rule_embed_dim), self.rule_embedding_W[tgt_par_rule_seq]) if not config.frontier_node_type_feed: tgt_node_embed *= 0. if not config.parent_action_feed: tgt_par_rule_embed *= 0. decoder_input = T.concatenate( [tgt_action_seq_embed_tm1, tgt_node_embed, tgt_par_rule_embed], axis=-1) query_embed = self.query_encoder_lstm(query_token_embed, mask=query_token_embed_mask, dropout=0, srng=self.srng) tgt_action_seq_mask = T.any(tgt_action_seq_type, axis=-1) alignments = self.decoder_lstm.align(decoder_input, context=query_embed, context_mask=query_token_embed_mask, mask=tgt_action_seq_mask, parent_t_seq=tgt_par_t_seq, srng=self.srng) alignment_inputs = [query_tokens, tgt_action_seq, tgt_action_seq_type, tgt_node_seq, tgt_par_rule_seq, tgt_par_t_seq] self.align = theano.function(alignment_inputs, [alignments])
def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs): self.w = tt.as_tensor_variable(w) if not isinstance(comp_dists, Distribution): raise TypeError( "The MixtureSameFamily distribution only accepts Distribution " f"instances as its components. Got {type(comp_dists)} instead." ) self.comp_dists = comp_dists if mixture_axis < 0: mixture_axis = len(comp_dists.shape) + mixture_axis if mixture_axis < 0: raise ValueError( "`mixture_axis` is supposed to be in shape of components' distribution. " f"Got {mixture_axis + len(comp_dists.shape)} axis instead out of the bounds." ) comp_shape = to_tuple(comp_dists.shape) self.shape = comp_shape[:mixture_axis] + comp_shape[mixture_axis + 1:] self.mixture_axis = mixture_axis kwargs.setdefault("dtype", self.comp_dists.dtype) # Compute the mode so we don't always have to pass a testval defaults = kwargs.pop("defaults", []) event_shape = self.comp_dists.shape[mixture_axis + 1:] _w = tt.shape_padleft( tt.shape_padright(w, len(event_shape)), len(self.comp_dists.shape) - w.ndim - len(event_shape), ) mode = take_along_axis( self.comp_dists.mode, tt.argmax(_w, keepdims=True), axis=mixture_axis, ) self.mode = mode[(..., 0) + (slice(None), ) * len(event_shape)] if not all_discrete(comp_dists): mean = tt.as_tensor_variable(self.comp_dists.mean) self.mean = (_w * mean).sum(axis=mixture_axis) if "mean" not in defaults: defaults.append("mean") defaults.append("mode") super().__init__(defaults=defaults, *args, **kwargs)
def AttnDecStep(name, n_input, input_dim, hidden_dim, ctx_dim, ctx, x_t, prev_state, mode='train',weightnorm=True): # h_tm1 = prev_state[:,:hidden_dim] # c_tm1 = prev_state[:,hidden_dim:] h_tm1 = prev_state if mode=='open-loop': x_t = lib.ops.Embedding( 'NMT.Embedding_Phons', n_input, input_dim, x_t ) tiled_h_tm1 = T.tile(h_tm1[:,None,:],[1,ctx.shape[1],1]) e_vec = T.nnet.relu(lib.ops.Linear( 'NMT.Attention.MLP1', T.concatenate([tiled_h_tm1,ctx],-1), hidden_dim+ctx_dim, hidden_dim )) e_vec = T.nnet.softmax(lib.ops.Linear( 'NMT.Attention.MLP2', e_vec, hidden_dim, 1 )[:,:,0]) # (B, seq_len) c_t = T.sum(T.shape_padright(e_vec)*ctx,axis=1) input_to_rnn = T.concatenate([x_t,c_t],-1) # state_t = LSTMStep(name,False, input_dim+ctx_dim, hidden_dim, input_to_rnn, prev_state) mask_t = T.ones((x_t.shape[0],)).astype(theano.config.floatX) state_t = GRUStep(name, input_dim+ctx_dim, hidden_dim, mask_t, input_to_rnn, h_tm1) if mode=='open-loop': logits = T.nnet.softmax(lib.ops.Linear( name+'.Output.MLP.1', T.concatenate([x_t,state_t[:,:hidden_dim],c_t],-1), input_dim+hidden_dim+ctx_dim, n_input )) idxs = T.argmax(logits,axis=-1).astype('int32') return idxs,state_t else: return state_t,c_t
def __init__(self, n, p, *args, **kwargs): super(Multinomial, self).__init__(*args, **kwargs) p = p / tt.sum(p, axis=-1, keepdims=True) if len(self.shape) == 2: try: assert n.shape == (self.shape[0], ) except AttributeError: # this occurs when n is a scalar Python int or float n *= tt.ones(self.shape[0]) self.n = tt.shape_padright(n) self.p = p if p.ndim == 2 else tt.shape_padleft(p) else: self.n = n self.p = p self.mean = self.n * self.p self.mode = tt.cast(tt.round(self.mean), 'int32')
def __init__( self, points, values, check_sorted=True, bounds_error=False, nout=-1 ): self.ndim = len(points) self.nout = int(nout) self.points = [theano.shared(p) for p in points] self.values = theano.shared(values) if self.values.ndim == self.ndim: self.values = tt.shape_padright(self.values) self.check_sorted = bool(check_sorted) self.bounds_error = bool(bounds_error) self.interp_op = RegularGridOp( self.ndim, nout=self.nout, check_sorted=self.check_sorted, bounds_error=self.bounds_error, )
def grad_wrt_list(cost, wrt_list): """ Compute gradient of cost wrt the variables in wrt_list. Return a concatenated vector of the results """ if wrt_list == []: return T.constant(0.), [] g_list = T.grad(cost, wrt_list) for (n, g) in enumerate(g_list): if g.ndim < 1: g_list[n] = T.shape_padright(g, n_ones=1) elif g.ndim > 1: # raise Exception("Gradients can only be taken wrt vectors.") g_list[n] = T.flatten(g) g_vec = T.concatenate(g_list) return g_vec, g_list
def Arodz(X, Y): """Takes in two sample sets, one from each class, and returns the MAP estimates of w and b """ numberOfFeatures = len(X[0]) Y = np.reshape(Y, (len(Y), 1)) # instantiate an empty PyMC3 model basic_model = pm.Model() # fill the model with details: with basic_model: mu_prior_cov = 100*np.eye(numberOfFeatures) mu_prior_mu = np.zeros((numberOfFeatures,)) # Priors for w,b (Gaussian priors), centered at 0, with very large std.dev. w = pm.MvNormal('estimated_w', mu=mu_prior_mu, cov=mu_prior_cov, shape=numberOfFeatures) b = pm.Normal('estimated_b',0,100) # calculate u=w^Tx+b ww=pm.Deterministic('my_w_as_mx',T.shape_padright(w,1)) # here w, b are unknown to be estimated from data # X is the known data matrix [samples x features] u = pm.Deterministic('my_u',T.dot(X,ww) + b) # u = pm.Deterministic('my_u',X*w + b); # P(+1|x)=a(u) #see slides for def. of a(u) prob = pm.Deterministic('my_prob',1.0 / (1.0 + T.exp(-1.0*u))) # class +1 is comes from a probability distribution with probability "prob" for +1, and 1-prob for class 0 # here Y is the known vector of classes # prob is (indirectly coming from the estimate of w,b and the data x) Y_obs=pm.Bernoulli('Y_obs',p=prob,observed = Y) # done with setting up the model # now perform maximum likelihood (actually, maximum a posteriori (MAP), since we have priors) estimation # map_estimate1 is a dictionary: "parameter name" -> "it's estimated value" map_estimate1 = pm.find_MAP(model=basic_model) return map_estimate1['estimated_w'], map_estimate1['estimated_b']
def __init__(self, loss, n_units, transfer_funcs): n_layers = len(n_units) print "===== MLP =========" print "Number of layers: ", n_layers print "Loss: ", loss print "Number of units: ", n_units print "Transfer function: ", transfer_funcs print "===================" # create ParameterSet vars = {} for lyr in range(n_layers): if lyr != 0: vars["weights_%d_to_%d" % (lyr - 1, lyr)] = (n_units[lyr], n_units[lyr - 1]) vars["bias_%d" % lyr] = (n_units[lyr],) vars.update(self.transfer_func_parameter_shape(lyr, transfer_funcs[lyr], n_units[lyr])) self.ps = ParameterSet(**vars) # create graph v_input = T.fmatrix('v_input') # v_input[unit, smpl] unit_val = [None for _ in range(n_layers)] for lyr in range(n_layers): if lyr == 0: unit_act = v_input else: unit_act = T.dot(self.weights(lyr - 1, lyr), unit_val[lyr - 1]) + T.shape_padright(self.bias(lyr)) unit_val[lyr] = self.make_transfer_func(lyr, transfer_funcs[lyr])(unit_act) output = unit_val[-1] self.f_predict = function([self.ps.flat, v_input], output, name='f_predict') # calculate loss if loss is not None: v_target = T.fmatrix('v_target') # v_target[unit, smpl] fit_smpl_loss = self.fit_loss(loss, transfer_funcs[-1], v_target, output) fit_loss = T.mean(fit_smpl_loss) loss = fit_loss dloss = T.grad(loss, self.ps.flat) self.f_loss = function([self.ps.flat, v_input, v_target], loss, name='f_loss') self.f_loss_grad = function([self.ps.flat, v_input, v_target], dloss, name='f_loss_grad')
def pool_1d_Lasagne(x, axis=1, mode='max'): """ Lasagne requires x is 3D, and pooling is done on the last dimension :param x: :param axis: :return: """ input_4d = tensor.shape_padright(x, 1) if axis == 1: input_4d = input_4d.dimshuffle((0, 2, 1, 3)) pooled = pool_2d( input_4d, ws=(2, 1), stride=(2, 1), ignore_border=True, pad=(0, 0), mode=mode, ) if axis == 1: # [DV] add support for 'axis' para pooled = pooled.dimshuffle((0, 2, 1, 3)) return pooled[:, :, :, 0]
def compute_weighted_averages(self, weights, attended): """Compute weighted averages of the attended sequence vectors. Parameters ---------- weights : :class:`~theano.Variable` The weights. The shape must be equal to the attended shape without the last dimension. attended : :class:`~theano.Variable` The attended. The index in the sequence must be the first dimension. Returns ------- weighted_averages : :class:`~theano.Variable` The weighted averages of the attended elements. The shape is equal to the attended shape with the first dimension dropped. """ return (tensor.shape_padright(weights) * attended).sum(axis=0)
def concatenate_basic(self, query_token_embed, query_tokens_phrase, query_tokens_pos, query_tokens_canon_id): transform = lambda tokens: T.shape_padright(tokens) # concatenate query_token_embed with query_tokens_phrase and query_tokens_pos, # essentially expanding the embedding to incorporate the new data if config.include_cid == True: return T.concatenate([ query_token_embed, transform(query_tokens_phrase), transform(query_tokens_pos), transform(query_tokens_canon_id) ], axis=2) else: return T.concatenate([ query_token_embed, transform(query_tokens_phrase), transform(query_tokens_pos) ], axis=2)
def conv2d_same(input, filters, input_shape=(None, None, None, None), filter_shape=(None, None, None, None), padding=None): assert input.ndim == 4 and filters.ndim == 4 assert (4 == len(input_shape)) and (4 == len(filter_shape)) assert (1 == filter_shape[2] % 2) and (1 == filter_shape[3] % 2) if (tuple(input_shape[2:4]) == (1, 1) and tuple(filter_shape[2:4]) == (1, 1)) or ( tuple(filter_shape[2:4]) == (1, 1) and theano.config.device == "cpu"): return tensor4dot(input, filters) else: new_row_begin = filters.shape[2] / 2 new_row_end = input.shape[2] + filters.shape[2] / 2 new_col_begin = filters.shape[3] / 2 new_col_end = input.shape[3] + filters.shape[3] / 2 if padding is not None: assert 1 == padding.ndim padded_input = TT.ones(( input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1, input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX) padded_input = TT.set_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end], numpy_floatX(0)) padding = TT.shape_padleft(TT.shape_padright(padding, 2), 1) padded_input = padding * padded_input else: padded_input = TT.zeros(( input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1, input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX) padded_input = TT.inc_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end], input) new_input_shape = [None, None, None, None] if input_shape[0] is not None: new_input_shape[0] = input_shape[0] if input_shape[1] is not None: new_input_shape[1] = input_shape[1] if input_shape[2] is not None and filter_shape[2] is not None: new_input_shape[2] = input_shape[2] + filter_shape[2] - 1 if input_shape[3] is not None and filter_shape[3] is not None: new_input_shape[3] = input_shape[3] + filter_shape[3] - 1 ret = TT.nnet.conv2d(input=padded_input, filters=filters, border_mode='valid', image_shape=tuple(new_input_shape), filter_shape=filter_shape) return ret
def theano_code(y, x, xs, ys, zs, xo, yo, ro): # Get the z coord z = tt.sqrt(1 - x**2 - y**2) # Compute the intensity pT = self.map_ref.ops.pT(x, y, z) # Weight the intensity by the illumination # Dot the polynomial into the basis intensity = tt.shape_padright(tt.dot(pT, self._A1y)) # Weight the intensity by the illumination xyz = tt.concatenate(( tt.reshape(x, [1, -1]), tt.reshape(y, [1, -1]), tt.reshape(z, [1, -1]), )) I = self.map_ref.ops.compute_illumination_point_source( xyz, xs, ys, zs, tt.as_tensor_variable(0.0), tt.as_tensor_variable(np.array(False)), ) intensity = tt.switch(tt.isnan(intensity), intensity, intensity * I)[0, 0] # Check if the point is visible result = ifelse( ((x - xo)**2 + (y - yo)**2 < ro**2)[0], tt.as_tensor_variable(0.0).astype(tt.config.floatX), ifelse( (x**2 + y**2 > 1)[0], tt.as_tensor_variable(0.0).astype(tt.config.floatX), intensity, ), ) return result
def step(self, ipt, state, state_strength, dropout_masks=None): """ Perform a single step of the network Params: ipt: The current input. Should be an int tensor of shape (n_batch, self.input_width) state: The previous state. Should be a float tensor of shape (n_batch, self.output_width) state_strength: Strength of the previous state. Should be a float tensor of shape (n_batch) dropout_masks: Masks from get_dropout_masks Returns: The next output state, and the next output strength """ if dropout_masks is not None: ipt_masks, state_masks = dropout_masks ipt = ipt * ipt_masks state = state * state_masks obs_state = state * T.shape_padright(state_strength) cat_ipt_state = T.concatenate([ipt, obs_state], 1) reset = do_layer(T.nnet.sigmoid, cat_ipt_state, self._reset_W, self._reset_b) update = do_layer(T.nnet.sigmoid, cat_ipt_state, self._update_W, self._update_b) update_state = update[:, :-1] update_strength = update[:, -1] cat_reset_ipt_state = T.concatenate([ipt, (reset * obs_state)], 1) candidate_act = do_layer(T.tanh, cat_reset_ipt_state, self._activation_W, self._activation_b) candidate_strength = do_layer( T.nnet.sigmoid, cat_reset_ipt_state, self._strength_W, self._strength_b).reshape(state_strength.shape) newstate = update_state * state + (1 - update_state) * candidate_act newstrength = update_strength * state_strength + ( 1 - update_strength) * candidate_strength return newstate, newstrength
def get_relative_position(self, t, light_delay=False): """The planets' positions relative to the star Args: t: The times where the position should be evaluated. Returns: The components of the position vector at ``t`` in units of ``R_sun``. """ if light_delay: raise NotImplementedError( "Light travel time delay is not implemented for simple orbits" ) dt = tt.mod(tt.shape_padright(t) - self._ref_time, self.period) dt -= self._half_period x = tt.squeeze(self.speed * dt) y = tt.squeeze(self._b_norm + tt.zeros_like(dt)) m = tt.abs_(dt) < 0.5 * self.duration z = tt.squeeze(m * 1.0 - (~m) * 1.0) return x, y, z
def __init__(self, theta, alpha, kappa, gamma=0, sigma=1, *args, **kwargs): super(GRMLike, self).__init__(*args, **kwargs) self.param_list = [] for var in [theta, alpha, kappa, gamma, sigma]: self.param_list.append(tt.as_tensor_variable(floatX(var))) par_names = ['theta', 'alpha', 'kappa', 'gamma', 'sigma'] self.params = {nm: var for nm, var in zip(par_names, self.param_list)} self.cprobst, self.probst = self.__init_probs() # Set number of categories self.k = tt.shape(self.probst)[-1] # Compute mode for each response category self.mode = tt.argmax(self.probst, axis=-1) # Numpy fancy indexing to allow observed data to index # probability tensor self.index = (tt.shape_padright(tt.arange(self.probst.shape[0])), tt.shape_padleft(tt.arange(self.probst.shape[1])))
def __call__(self, x): for k1, k2 in self.res_kernels[0]: x += T.nnet.conv2d(relu(T.nnet.conv2d(x, k1, border_mode='half')), k2, border_mode='half') for i, (k, b) in enumerate(zip(self.layer_kernels, self.layer_biases)): if i > 0: x = relu(x) if self.upscale > 1: x = x.repeat(self.upscale, axis=2)[:, :, :-1].repeat(self.upscale, axis=3)[:, :, :, :-1] x = T.nnet.conv2d(x, k, subsample=(self.downscale, self.downscale), border_mode='half') + T.shape_padright(b, 2) for k1, k2 in self.res_kernels[i + 1]: x += T.nnet.conv2d(relu( T.nnet.conv2d(x, k1, border_mode='half')), k2, border_mode='half') if self.output == 'linear': return x elif self.output == 'relu': return relu(x)
def get_light_curve(time, tpeaks, fwhms, ampls, texp=None, oversample=7): time = time.astype("float64") time = tt.as_tensor_variable(time) if texp is None: tgrid = time if texp is not None: # taking this oversample code from # https://github.com/dfm/exoplanet # and https://github.com/lkreidberg/batman oversample = int(oversample) oversample += 1 - oversample % 2 dt = np.linspace(-texp / 2.0, texp / 2.0, oversample) tgrid = tt.shape_padright(time) + dt multiflare_lc = multiflaremodel(tgrid, tpeaks, fwhms, ampls) if texp is not None: multiflare_lc = tt.mean( tt.reshape(multiflare_lc, (-1, oversample)), axis=1 ) return multiflare_lc
def __init__(self, input, image_shape, pool_size, sparse_count): #not implementing max pooling as of now. have to do with average pooling oneZeros = numpy.concatenate(([1], numpy.zeros(sparse_count))) x = numpy.insert(numpy.tile(oneZeros, pool_size[0] - 1), (pool_size[0] - 1) * (len(oneZeros)), 1) y = numpy.insert(numpy.tile(oneZeros, pool_size[1] - 1), (pool_size[1] - 1) * (len(oneZeros)), 1) z = numpy.insert(numpy.tile(oneZeros, pool_size[2] - 1), (pool_size[2] - 1) * (len(oneZeros)), 1) mask = numpy.outer(numpy.outer(x, y), z).reshape(len(x), len(y), len(z)) mask = numpy.ones((1, 1, len(x), len(y), len(z))) * mask self.pool_mask = mask.astype( theano.config.floatX) / numpy.prod(pool_size) frame_shape = input.shape[-3:] batch_size = T.shape_padright(T.prod(input.shape[:-3]), 1) new_shape = T.cast( T.join(0, batch_size, T.as_tensor([ 1, ]), frame_shape), 'int32') filter_shape = (1, 1, len(x), len(y), len(z)) input_5d = T.reshape(input, new_shape, ndim=5) image_shape = (image_shape[0] * image_shape[1], 1, image_shape[2], image_shape[3], image_shape[4]) avg_out = conv3d( signals=input_5d.dimshuffle([0, 2, 1, 3, 4]), filters=self.pool_mask.transpose(0, 2, 1, 3, 4), signals_shape=[image_shape[i] for i in [0, 2, 1, 3, 4]], filters_shape=[filter_shape[i] for i in [0, 2, 1, 3, 4]], border_mode='valid').dimshuffle([0, 2, 1, 3, 4]) outshp = T.join(0, input.shape[:-3], avg_out.shape[-3:]) avg_out = T.reshape(avg_out, outshp, ndim=5) self.outputlen = (image_shape[2] - len(x) + 1, image_shape[3] - len(y) + 1, image_shape[4] - len(z) + 1) self.output = avg_out
def _apply_index(data, indices, axis=0): """ Indexes data along a single axis. Indicies is a tensor of indices shaped like data minus the given axis. The result is a slice of data removing the given axis: for each entry of the othe dimiensions, the given index for that axis is used to select the single item. _apply_index can be used to derefernce the tensor search results returned from tensor.argmax(). """ ndim = data.type.ndim shape = data.shape if indices.type.ndim < ndim - 1: indices = tensor.shape_padright(indices, n_ones=ndim - indices.type.ndim - 1) return data[tuple( indices if a == axis else _axis_count(shape, a, ndim - 1) if a < axis else _axis_count(shape, a - 1, ndim - 1) for a in range(ndim))]
def __call__(self, target, context, mask=None): target = target * T.cast(T.shape_padright(mask), 'float32') padded_mask = self.get_padded_shuffled_mask(mask, pad=1) # target = theano.printing.Print('X::' + self.name)(target) X_shifted = T.concatenate([ alloc_zeros_matrix(target.shape[0], 1, self.input_dim), target[:, 0:-1, :] ], axis=-2) # X = theano.printing.Print('X::' + self.name)(X) # X = T.zeros_like(target) # T.set_subtensor(X[:, 1:, :], target[:, 0:-1, :]) X = X_shifted.dimshuffle((1, 0, 2)) ctx_step = context.dimshuffle(('x', 0, 1)) x_z = T.dot(X, self.W_z) + T.dot(ctx_step, self.C_z) + self.b_z x_r = T.dot(X, self.W_r) + T.dot(ctx_step, self.C_r) + self.b_r x_h = T.dot(X, self.W_h) + T.dot(ctx_step, self.C_h) + self.b_h h, updates = theano.scan(self._step, sequences=[x_z, x_r, x_h, padded_mask], outputs_info=T.unbroadcast( alloc_zeros_matrix( X.shape[1], self.hidden_dim), 1), non_sequences=[self.U_z, self.U_r, self.U_h]) # (batch_size, max_token_len, hidden_dim) h = h.dimshuffle((1, 0, 2)) # (batch_size, max_token_len, vocab_size) predicts = T.dot(h, self.U_y) + T.dot(context.dimshuffle( (0, 'x', 1)), self.C_y) + self.b_y # + T.dot(X_shifted, self.W_y) predicts_flatten = predicts.reshape((-1, predicts.shape[2])) return T.nnet.softmax(predicts_flatten).reshape( (predicts.shape[0], predicts.shape[1], predicts.shape[2]))