예제 #1
0
def _span_sums(stt, end, p_lens, max_p_len, batch_size, dim, max_ans_len):
  # Sum of every start element and corresponding max_ans_len end elements.
  #
  # stt     (max_p_len, batch_size, dim)
  # end     (max_p_len, batch_size, dim)
  # p_lens  (batch_size,)
  max_ans_len_range = tt.shape_padleft(tt.arange(max_ans_len))          # (1, max_ans_len)
  offsets = tt.shape_padright(tt.arange(max_p_len))                     # (max_p_len, 1)
  end_idxs = max_ans_len_range + offsets                                # (max_p_len, max_ans_len)
  end_idxs_flat = end_idxs.flatten()                                    # (max_p_len*max_ans_len,)

  end_padded = tt.concatenate(                                          # (max_p_len+max_ans_len-1, batch_size, dim)
    [end, tt.zeros((max_ans_len-1, batch_size, dim))], axis=0)    
  end_structured = end_padded[end_idxs_flat]                            # (max_p_len*max_ans_len, batch_size, dim)
  end_structured = end_structured.reshape(                              # (max_p_len, max_ans_len, batch_size, dim)
    (max_p_len, max_ans_len, batch_size, dim))
  stt_shuffled = stt.dimshuffle((0,'x',1,2))                            # (max_p_len, 1, batch_size, dim)

  span_sums = stt_shuffled + end_structured                             # (max_p_len, max_ans_len, batch_size, dim)
  span_sums_reshaped = span_sums.dimshuffle((2,0,1,3)).reshape(         # (batch_size, max_p_len*max_ans_len, dim)
    (batch_size, max_p_len*max_ans_len, dim))

  p_lens_shuffled = tt.shape_padright(p_lens)                           # (batch_size, 1)
  end_idxs_flat_shuffled = tt.shape_padleft(end_idxs_flat)              # (1, max_p_len*max_ans_len)

  span_masks_reshaped = tt.lt(end_idxs_flat_shuffled, p_lens_shuffled)  # (batch_size, max_p_len*max_ans_len)
  span_masks_reshaped = cast_floatX(span_masks_reshaped)

  # (batch_size, max_p_len*max_ans_len, dim), (batch_size, max_p_len*max_ans_len)
  return span_sums_reshaped, span_masks_reshaped
예제 #2
0
 def create_prediction(self):#做一次predict的方法
     gfs=self.gfs
     pm25in=self.pm25in
     #初始第一次前传
     gfs_x=T.concatenate([gfs[:,0],gfs[:,1],gfs[:,2]],axis=1)
     pm25in_x=T.concatenate([pm25in[:,0],pm25in[:,1]],axis=1)
     self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,0]],axis=1))
     self.results=self.layerstatus[-1]
     for i in xrange(1,7):#前6次(0-5),输出之前的先做的6个frame,之后第7次是第1个输出
         gfs_x=T.concatenate([gfs_x[:,9:],gfs[:,i+2]],axis=1)
         pm25in_x=T.concatenate([pm25in_x[:,1:],pm25in[:,i+1]],axis=1)
         self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,i]],axis=1),self.layerstatus)
         self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)
     if self.steps > 1:
         gfs_x=T.concatenate([gfs_x[:,9:],gfs[:,9]],axis=1)
         pm25in_x=T.concatenate([pm25in_x[:,1:],T.shape_padright(self.results[:,-1])],axis=1)
         self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,7]],axis=1),self.layerstatus)
         self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)
         #前传之后step-2次
         for i in xrange(2,self.steps):
             gfs_x=T.concatenate([gfs_x[:,9:],gfs[:,i+8]],axis=1)
             pm25in_x=T.concatenate([pm25in_x[:,1:],T.shape_padright(self.results[:,-1])],axis=1)
             self.layerstatus=self.model.forward(T.concatenate([gfs_x,pm25in_x,self.cnt[:,:,i+6]],axis=1),self.layerstatus)
             #need T.shape_padright???
             self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)
     return self.results
def maxpool_3D(input, ds, ignore_border=False):
   
    #input.dimshuffle (0, 2, 1, 3, 4)   # convert to make video in back. 
    # no need to reshuffle. 
    if input.ndim < 3:
        raise NotImplementedError('max_pool_3d requires a dimension >= 3')

    # extract nr dimensions
    vid_dim = input.ndim
    # max pool in two different steps, so we can use the 2d implementation of 
    # downsamplefactormax. First maxpool frames as usual. 
    # Then maxpool the time dimension. Shift the time dimension to the third 
    # position, so rows and cols are in the back


    # extract dimensions
    frame_shape = input.shape[-2:]
    
    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input.shape[:-2])
    batch_size = T.shape_padright(batch_size,1)
    
    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = T.cast(T.join(0, batch_size,
                                        T.as_tensor([1,]), 
                                        frame_shape), 'int32')
    input_4D = T.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of videos in rows and cols
    op = DownsampleFactorMax((ds[1],ds[2]), ignore_border)          # so second and third dimensions of ds are for height and width
    output = op(input_4D)
    # restore to original shape                                     
    outshape = T.join(0, input.shape[:-2], output.shape[-2:])
    out = T.reshape(output, outshape, ndim=input.ndim)

    # now maxpool time
    # output (time, rows, cols), reshape so that time is in the back
    shufl = (list(range(vid_dim-3)) + [vid_dim-2]+[vid_dim-1]+[vid_dim-3])
    input_time = out.dimshuffle(shufl)
    # reset dimensions
    vid_shape = input_time.shape[-2:]
    
    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input_time.shape[:-2])
    batch_size = T.shape_padright(batch_size,1)
    
    # store as 4D tensor with shape: (batch_size,1,width,time)
    new_shape = T.cast(T.join(0, batch_size,
                                        T.as_tensor([1,]), 
                                        vid_shape), 'int32')
    input_4D_time = T.reshape(input_time, new_shape, ndim=4)
    # downsample mini-batch of videos in time
    op = DownsampleFactorMax((1,ds[0]), ignore_border)            # Here the time dimension is downsampled. 
    outtime = op(input_4D_time)
    # output 
    # restore to original shape (xxx, rows, cols, time)
    outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2])
    #rval = T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
    return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
예제 #4
0
def roc_curves(y_true, y_predicted):
    "returns roc curves calculated axis -1-wise"
    fps, tps, thresholds = _binary_clf_curves(y_true, y_predicted)
    last_col = _last_col_idx(y_true.ndim)
    fpr = fps.astype('float32') / T.shape_padright(fps[last_col], 1)
    tpr = tps.astype('float32') / T.shape_padright(tps[last_col], 1)
    return fpr, tpr, thresholds
예제 #5
0
    def __init__(self, n, p, *args, **kwargs):
        super(Multinomial, self).__init__(*args, **kwargs)

        p = p / tt.sum(p, axis=-1, keepdims=True)
        n = np.squeeze(n) # works also if n is a tensor

        if len(self.shape) > 1:
            m = self.shape[-2]
            try:
                assert n.shape == (m,)
            except (AttributeError, AssertionError):
                n = n * tt.ones(m)
            self.n = tt.shape_padright(n)
            self.p = p if p.ndim > 1 else tt.shape_padleft(p)
        elif n.ndim == 1:
            self.n = tt.shape_padright(n)
            self.p = p if p.ndim > 1 else tt.shape_padleft(p)
        else:
            # n is a scalar, p is a 1d array
            self.n = tt.as_tensor_variable(n)
            self.p = tt.as_tensor_variable(p)

        self.mean = self.n * self.p
        mode = tt.cast(tt.round(self.mean), 'int32')
        diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
        inc_bool_arr = tt.abs_(diff) > 0
        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
                                diff[inc_bool_arr.nonzero()])
        self.mode = mode
예제 #6
0
def getTheanoSimilarityFunction():
    """
    Return a theano function erforming valid convolution of a filter on an
    image
    """
        
    # Define the input variables to the function
    patches = T.tensor3(dtype='float32') # AxBx(patchsize**2)
    filters = T.matrix(dtype='float32') # Cx(patchsize**2)
    globalMean = T.vector(dtype='float32')
    globalStd = T.vector(dtype='float32')
    
    # Perform canonical processing of the patches
    meanstd = patches.std()
    mean = T.shape_padright(patches.mean(2), n_ones=1)
    std = T.shape_padright(patches.std(2) + 0.1 * meanstd, n_ones=1)  
    std = T.shape_padright(patches.std(2) + 1e-6, n_ones=1)  
    canonicalPatches_ = (patches - mean) / std  
    canonicalPatches = (canonicalPatches_ - globalMean) / globalStd  

    # Compute the similarities between each patch and each filter
    similarities = T.tensordot(canonicalPatches, filters, axes=[[2],[1]]) # AxBxC
    
    normFactor = ((canonicalPatches** 2).sum(2) ** 0.5)
    normFactorPadded = T.shape_padright(normFactor, n_ones=1)
    
    # Normalize the similarities by the norm of the patches
    similaritiesNorm = (similarities / normFactorPadded)
    
    # Compile and return the theano function
    f = theano.function([patches, filters, globalMean, globalStd], 
                        similaritiesNorm, on_unused_input='ignore')
    return f
예제 #7
0
    def sym_mask_logdensity_estimator_intermediate(self, x, mask):
        non_linearity_name = self.parameters["nonlinearity"].get_name()
        assert non_linearity_name == "sigmoid" or non_linearity_name == "RLU"
        x = x.T  # BxD
        mask = mask.T  # BxD
        output_mask = constantX(1) - mask  # BxD
        D = constantX(self.n_visible)
        d = mask.sum(1)  # d is the 1-based index of the dimension whose value to infer (not the size of the context)
        masked_input = x * mask  # BxD
        h = self.nonlinearity(T.dot(masked_input, self.W1) + T.dot(mask, self.Wflags) + self.b1)  # BxH
        for l in xrange(self.n_layers - 1):
            h = self.nonlinearity(T.dot(h, self.Ws[l]) + self.bs[l])  # BxH
        z_alpha = T.tensordot(h, self.V_alpha, [[1], [1]]) + T.shape_padleft(self.b_alpha)
        z_mu = T.tensordot(h, self.V_mu, [[1], [1]]) + T.shape_padleft(self.b_mu)
        z_sigma = T.tensordot(h, self.V_sigma, [[1], [1]]) + T.shape_padleft(self.b_sigma)
        temp = T.exp(z_alpha)  # + 1e-6
        # temp += T.shape_padright(temp.sum(2)/1e-3)
        Alpha = temp / T.shape_padright(temp.sum(2))  # BxDxC
        Mu = z_mu  # BxDxC
        Sigma = T.exp(z_sigma)  # + 1e-6 #BxDxC

        # Alpha = Alpha * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Mu = Mu * T.shape_padright(output_mask)
        # Sigma = Sigma * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x*output_mask)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2*np.pi)) #BxDxC

        Phi = (
            -constantX(0.5) * T.sqr((Mu - T.shape_padright(x)) / Sigma)
            - T.log(Sigma)
            - constantX(0.5 * np.log(2 * np.pi))
        )  # BxDxC
        logdensity = (log_sum_exp(Phi + T.log(Alpha), axis=2) * output_mask).sum(1) * D / (D - d)
        return (logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h)
예제 #8
0
 def create_prediction(self):  # 做一次predict的方法
     gfs = self.gfs
     pm25in = self.pm25in
     # 初始第一次前传
     self.layerstatus = self.model.forward(
         T.concatenate([gfs[:, 0], gfs[:, 1], gfs[:, 2], pm25in[:, 0], pm25in[:, 1], self.cnt[:, :, 0]], axis=1)
     )
     # results.shape?40*1
     self.results = self.layerstatus[-1]
     if self.steps > 1:
         self.layerstatus = self.model.forward(
             T.concatenate([gfs[:, 1], gfs[:, 2], gfs[:, 3], pm25in[:, 1], self.results, self.cnt[:, :, 1]], axis=1),
             self.layerstatus,
         )
         self.results = T.concatenate([self.results, self.layerstatus[-1]], axis=1)
         # 前传之后step-2次
         for i in xrange(2, self.steps):
             self.layerstatus = self.model.forward(
                 T.concatenate(
                     [
                         gfs[:, i],
                         gfs[:, i + 1],
                         gfs[:, i + 2],
                         T.shape_padright(self.results[:, i - 2]),
                         T.shape_padright(self.results[:, i - 1]),
                         self.cnt[:, :, i],
                     ],
                     axis=1,
                 ),
                 self.layerstatus,
             )
             # need T.shape_padright???
             self.results = T.concatenate([self.results, self.layerstatus[-1]], axis=1)
     return self.results
예제 #9
0
파일: ttv.py 프로젝트: dfm/exoplanet
 def _warp_times(self, t):
     delta = tt.shape_padleft(t) / tt.shape_padright(self.period, t.ndim)
     delta += tt.shape_padright(self._base_time, t.ndim)
     ind = tt.cast(tt.floor(delta), "int64")
     dt = tt.stack([ttv[tt.clip(ind[i], 0, ttv.shape[0]-1)]
                    for i, ttv in enumerate(self.ttvs)], -1)
     return tt.shape_padright(t) + dt
예제 #10
0
    def prediction(self, h, bias):
        srng = RandomStreams(seed=42)

        prop, mean_x, mean_y, std_x, std_y, rho, bernoulli = \
            self.compute_parameters(h, bias)

        mode = T.argmax(srng.multinomial(pvals=prop, dtype=prop.dtype), axis=1)

        v = T.arange(0, mean_x.shape[0])
        m_x = mean_x[v, mode]
        m_y = mean_y[v, mode]
        s_x = std_x[v, mode]
        s_y = std_y[v, mode]
        r = rho[v, mode]
        # cov = r * (s_x * s_y)

        normal = srng.normal((h.shape[0], 2))
        x = normal[:, 0]
        y = normal[:, 1]

        # x_n = T.shape_padright(s_x * x + cov * y + m_x)
        # y_n = T.shape_padright(s_y * y + cov * x + m_y)

        x_n = T.shape_padright(m_x + s_x * x)
        y_n = T.shape_padright(m_y + s_y * (x * r + y * T.sqrt(1.-r**2)))

        uniform = srng.uniform((h.shape[0],))
        pin = T.shape_padright(T.cast(bernoulli > uniform, floatX))

        return T.concatenate([x_n, y_n, pin], axis=1)
예제 #11
0
파일: impulse.py 프로젝트: mmyros/pyglm
 def filter_spike_train(n,S,taus):
     """ Helper function to filter the spike train
     """
     filt = T.shape_padright(filt_fn(taus[n]), n_ones=1)
     filtered_S = conv2d(T.shape_padright(S[:,n], n_ones=1), 
                         filt, 
                         border_mode='full')
     return filtered_S[0,:,0]
예제 #12
0
파일: crf.py 프로젝트: tbepler/rnn
 def __call__(self, crf, X, Y, mask=None, flank=0):
     Yh = self.decode(crf, X, Y)
     L = self.loss(Yh, Y)
     C = confusion(T.argmax(Yh,axis=-1), Y, Yh.shape[-1])
     if mask is not None:
         L *= T.shape_padright(mask)
         C *= T.shape_padright(T.shape_padright(mask))
     n = Yh.shape[0]
     return L[flank:n-flank], C[flank:n-flank]
예제 #13
0
 def dfe_dlhat(self, g_hat, h_hat, l_hat, v):
     # term from loss function
     dloss_dl = self.label_multiplier * (T.dot(h_hat, self.Whl) + self.lbias)
     rval = dloss_dl * l_hat - l_hat * T.shape_padright(T.sum(l_hat * dloss_dl, axis=1))
     # term from entropy.
     # dentropy = T.sum(-l_hat * T.log(l_hat), axis=1)
     dentropy = - T.xlogx.xlogx(l_hat) - l_hat +\
                  l_hat * T.shape_padright(T.sum(T.xlogx.xlogx(l_hat) + l_hat, axis=1))
     return rval + dentropy
예제 #14
0
파일: charrnn.py 프로젝트: tbepler/rnn
 def _theano_confusion(self, Yh, Y, mask):
     Yh = T.argmax(Yh, axis=-1)
     shape = list(Yh.shape) + [self.n_out, self.n_out]
     C = T.zeros(shape, dtype='int64')
     i,j = T.mgrid[0:C.shape[0], 0:C.shape[1]]
     C = T.set_subtensor(C[i,j,Y,Yh], 1)
     mask = T.shape_padright(T.shape_padright(mask))
     C = C*mask
     return C
예제 #15
0
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma)
            return (p, a, x)
예제 #16
0
파일: topic_lstm.py 프로젝트: tbepler/rnn
 def loss(self, X, mask=None, flank=0, Z=None):
     if Z is None:
         Z = self.transform(self.noise(X), mask=mask)
     E = self.emit(Z)
     L = cross_entropy(E, X)
     C = confusion(T.argmax(E,axis=-1), X, E.shape[-1])
     if mask is not None:
         L *= T.shape_padright(mask)
         C *= T.shape_padright(T.shape_padright(mask))
     n = X.shape[0]
     return L[flank:n-flank], C[flank:n-flank]
예제 #17
0
def max_pool_3d(input, ds, ignore_border=False):
	"""
		Takes as input a N-D tensor, where N >= 3. It downscales the input video by
		the specified factor, by keeping only the maximum value of non-overlapping
		patches of size (ds[0],ds[1],ds[2]) (time, height, width)  
		
		:type input: N-D theano tensor of input images.
		:param input: input images. Max pooling will be done over the 3 last dimensions.
		:type ds: tuple of length 3
		:param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension.
		:param ignore_border: boolean value. Example when True, (5,5,5) input with ds=(2,2,2) will generate a
		(2,2,2) output. (3,3,3) otherwise.
	"""
	if input.ndim < 3:
		raise NotImplementedError('max_pool_3d requires a dimension >= 3')
		
	vid_dim = input.ndim
	#Maxpool frame
	frame_shape = input.shape[-2:]

	# count the number of "leading" dimensions, store as dmatrix
	batch_size = T.prod(input.shape[:-2])
	batch_size = T.shape_padright(batch_size,1)
	new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),frame_shape), 'int32')
	
	input_4D = T.reshape(input, new_shape, ndim=4)
	# downsample mini-batch of videos in rows and cols
	op = DownsampleFactorMax((ds[1],ds[2]), ignore_border)
	output = op(input_4D)
	# restore to original shape
	outshape = T.join(0, input.shape[:-2], output.shape[-2:])
	out = T.reshape(output, outshape, ndim=input.ndim)
	
	#Maxpool time 
	# output (time, rows, cols), reshape so that time is in the back
	shufl = (list(range(vid_dim-4)) + list(range(vid_dim-3,vid_dim))+[vid_dim-4])
	input_time = out.dimshuffle(shufl)
	# reset dimensions
	vid_shape = input_time.shape[-2:]
	# count the number of "leading" dimensions, store as dmatrix
	batch_size = T.prod(input_time.shape[:-2])
	batch_size = T.shape_padright(batch_size,1)
	# store as 4D tensor with shape: (batch_size,1,width,time)
	new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),vid_shape), 'int32')
	input_4D_time = T.reshape(input_time, new_shape, ndim=4)
	# downsample mini-batch of videos in time
	op = DownsampleFactorMax((1,ds[0]), ignore_border)
	outtime = op(input_4D_time)
	# restore to original shape (xxx, rows, cols, time)
	outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
	shufl = (list(range(vid_dim-4)) + [vid_dim-1] + list(range(vid_dim-4,vid_dim-1)))
	#shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2])
	return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
예제 #18
0
    def apply(self, input_, application_call):
        """Apply the linear transformation followed by masking with noise.
        Parameters
        ----------
        input_ : :class:`~tensor.TensorVariable`
            The input on which to apply the transformations
        Returns
        -------
        output : :class:`~tensor.TensorVariable`
            The transformed input
        """

        # When not in training mode, turn off noise
        if not self._training_mode:
            return input_

        if self.tied_sigma:
            average = tensor.shape_padright(self.flatten.apply(input_), 2)
            noise_level = (self.prior_noise_level -
                    tensor.clip(self.mask.apply(average), -16, 16))
            noise_level = tensor.patternbroadcast(noise_level,
                    (False, False, True, True))
            noise_level = copy_and_tag_noise(
                    noise_level, self, LOG_SIGMA, 'log_sigma')
        else:
            average = input_
            noise_level = (self.prior_noise_level -
                    tensor.clip(self.mask.apply(input_), -16, 16))
            noise_level = copy_and_tag_noise(
                    noise_level, self, LOG_SIGMA, 'log_sigma')
        # Allow incomplete batches by just taking the noise that is needed
        if self.tied_noise:
            if self.noise_batch_size is not None:
                noise = self.parameters[0][:input_.shape[0], :]
            else:
                noise = self.theano_rng.normal(input_.shape[0:2])
            noise = tensor.shape_padright(2)
        else:
            if self.noise_batch_size is not None:
                noise = self.parameters[0][:input_.shape[0], :, :, :]
            else:
                noise = self.theano_rng.normal(input_.shape)
        kl = (
            self.prior_noise_level - noise_level
            + 0.5 * (
                tensor.exp(2 * noise_level)
                + (average - self.prior_mean) ** 2
                ) / tensor.exp(2 * self.prior_noise_level)
            - 0.5
            )
        application_call.add_auxiliary_variable(kl, roles=[NITS], name='nits')
        return input_ + self.noise_rate * tensor.exp(noise_level) * noise
예제 #19
0
    def __init__(self, eta, cutpoints, *args, **kwargs):
        self.eta = tt.as_tensor_variable(eta)
        self.cutpoints = tt.as_tensor_variable(cutpoints)

        pa = sigmoid(tt.shape_padleft(self.cutpoints) - tt.shape_padright(self.eta))
        p_cum = tt.concatenate([
            tt.zeros_like(tt.shape_padright(pa[:, 0])),
            pa,
            tt.ones_like(tt.shape_padright(pa[:, 0]))
        ], axis=1)
        p = p_cum[:, 1:] - p_cum[:, :-1]

        super().__init__(p=p, *args, **kwargs)
예제 #20
0
 def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma,activation_factor, p_prev, a_prev, x_prev,):
     a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
     h = self.nonlinearity(a * activation_factor)  # BxH
     Alpha = T.nnet.softmax(T.dot(h, V_alpha) + b_alpha)  # BxC
     #Alpha = theano.printing.Print('Alpha')(Alpha)
     Mu = T.dot(h, V_mu) + b_mu  # BxC
     #Mu = theano.printing.Print('Mu')(Mu)
     Sigma = T.exp(T.dot(h, V_sigma) + b_sigma)  # BxC
     #Sigma = theano.printing.Print('Sigma')(Sigma)
     arg = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * numpy.log(2 * numpy.pi)) + T.log(Alpha)
     #arg = theano.printing.Print('Mu')(arg)
     p = p_prev + log_sum_exp(arg)
     return (p, a, x)
예제 #21
0
        def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activation_factor, a_i, lp_accum, dP_da_ip1):
            B = T.cast(x_i.shape[0], theano.config.floatX)
            pot = a_i * activation_factor
            h = self.nonlinearity(pot)  # BxH

            z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha)
            z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)
            z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma)

            Alpha = T.nnet.softmax(z_alpha)  # BxC
            Mu = z_mu  # BxC
            Sigma = T.exp(z_sigma)  # BxC

            Phi = -T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x_i, 1)) / Sigma
            wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0))

            lp_current = log_sum_exp(wPhi)
            # lp_current_sum = T.sum(lp_current)

            Pi = T.exp(wPhi - T.shape_padright(lp_current, 1))  # #
            dp_dz_alpha = Pi - Alpha  # BxC
            # dp_dz_alpha = T.grad(lp_current_sum, z_alpha)
            gb_alpha = dp_dz_alpha.mean(0, dtype=theano.config.floatX)  # C
            gV_alpha = T.dot(h.T, dp_dz_alpha) / B  # HxC

            # dp_dz_mu = T.grad(lp_current_sum, z_mu)
            dp_dz_mu = Pi * T.sgn(T.shape_padright(x_i, 1) - Mu) / Sigma
            # dp_dz_mu = dp_dz_mu * Sigma
            gb_mu = dp_dz_mu.mean(0, dtype=theano.config.floatX)
            gV_mu = T.dot(h.T, dp_dz_mu) / B

            # dp_dz_sigma = T.grad(lp_current_sum, z_sigma)
            dp_dz_sigma = Pi * (T.abs_(T.shape_padright(x_i, 1) - Mu) / Sigma - 1)
            gb_sigma = dp_dz_sigma.mean(0, dtype=theano.config.floatX)
            gV_sigma = T.dot(h.T, dp_dz_sigma) / B

            dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot(dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T)  # BxH
            if non_linearity_name == "sigmoid":
                dp_dpot = dp_dh * h * (1 - h)
            elif non_linearity_name == "RLU":
                dp_dpot = dp_dh * (pot > 0)

            gfact = (dp_dpot * a_i).sum(1).mean(0, dtype=theano.config.floatX)  # 1

            dP_da_i = dP_da_ip1 + dp_dpot * activation_factor  # BxH
            gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B

            return (a_i - T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)),
                    lp_accum + lp_current,
                    dP_da_i,
                    gW, gb_alpha, gV_alpha, gb_mu, gV_mu, gb_sigma, gV_sigma, gfact)
예제 #22
0
파일: RNADE.py 프로젝트: sidsig/RNADE
        def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activation_factor, a_i, lp_accum, dP_da_ip1):
            B = T.cast(x_i.shape[0], floatX)
            pot = a_i * activation_factor
            h = self.nonlinearity(pot)  # BxH

            z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha)
            z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)
            z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma)

            Alpha = T.nnet.softmax(z_alpha)  # BxC
            Mu = z_mu  # BxC
            Sigma = T.exp(z_sigma)  # BxC

            Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x_i, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * numpy.log(2 * numpy.pi))
            wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0))

            lp_current = -log_sum_exp(wPhi)  # negative log likelihood
            # lp_current_sum = T.sum(lp_current)

            Pi = T.exp(wPhi - T.shape_padright(lp_current, 1))  # #
            dp_dz_alpha = Pi - Alpha  # BxC
            # dp_dz_alpha = T.grad(lp_current_sum, z_alpha)
            gb_alpha = dp_dz_alpha.mean(0, dtype=floatX)  # C
            gV_alpha = T.dot(h.T, dp_dz_alpha) / B  # HxC

            dp_dz_mu = -Pi * (Mu - T.shape_padright(x_i, 1)) / T.sqr(Sigma)
            # dp_dz_mu = T.grad(lp_current_sum, z_mu)
            dp_dz_mu = dp_dz_mu * Sigma  # Heuristic
            gb_mu = dp_dz_mu.mean(0, dtype=floatX)
            gV_mu = T.dot(h.T, dp_dz_mu) / B

            dp_dz_sigma = Pi * (T.sqr(T.shape_padright(x_i, 1) - Mu) / T.sqr(Sigma) - 1)
            # dp_dz_sigma = T.grad(lp_current_sum, z_sigma)
            gb_sigma = dp_dz_sigma.mean(0, dtype=floatX)
            gV_sigma = T.dot(h.T, dp_dz_sigma) / B

            dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot(dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T)  # BxH
            if self.hidden_act == "sigmoid":
                dp_dpot = dp_dh * h * (1 - h)
            elif self.hidden_act == "ReLU":
                dp_dpot = dp_dh * (pot > 0)

            gfact = (dp_dpot * a_i).sum(1).mean(0, dtype=floatX)  # 1

            dP_da_i = dP_da_ip1 + dp_dpot * activation_factor  # BxH
            gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B

            return (a_i - T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)),
                    lp_accum + lp_current,
                    dP_da_i,
                    gW, gb_alpha, gV_alpha, gb_mu, gV_mu, gb_sigma, gV_sigma, gfact)
예제 #23
0
 def create_prediction(self):#做一次predict的方法
     gfs=self.gfs
     pm25in=self.pm25in
     #初始第一次前传
     self.layerstatus=self.model.forward(T.concatenate([gfs[0],gfs[1],gfs[2],pm25in[0],pm25in[1]],axis=0))
     self.results=T.shape_padright(self.layerstatus[-1])
     if self.steps > 1:
         self.layerstatus=self.model.forward(T.concatenate([gfs[1],gfs[2],gfs[3],pm25in[1],self.results[0]],axis=0),self.layerstatus)
         self.results=T.concatenate([self.results,T.shape_padright(self.layerstatus[-1])],axis=0)      
         #前传之后step-2次
         for i in xrange(2,self.steps):
             self.layerstatus=self.model.forward(T.concatenate([gfs[i],gfs[i+1],gfs[i+2],self.results[i-2],self.results[i-1]],axis=0),self.layerstatus)
             #need T.shape_padright???
             self.results=T.concatenate([self.results,T.shape_padright(self.layerstatus[-1])],axis=0)
     return self.results
    def decode_to_probs(self, activations, relative_position, low_bound, high_bound):
        assert (low_bound%12==0) and (high_bound-low_bound == self.num_octaves*12), "Circle of thirds must evenly divide into octaves"
        squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH))

        rsp = T.nnet.softmax(squashed[:,:3])
        c1 = T.nnet.softmax(squashed[:,3:7])
        c2 = T.nnet.softmax(squashed[:,7:10])
        octave_choice = T.nnet.softmax(squashed[:,10:])
        octave_notes = T.tile(c1,(1,3)) * T.tile(c2,(1,4))
        full_notes = T.reshape(T.shape_padright(octave_choice) * T.shape_padaxis(octave_notes, 1), (-1,12*self.num_octaves))
        full_probs = T.concatenate([rsp[:,:2], T.shape_padright(rsp[:,2])*full_notes], 1)

        newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0)
        fixed = T.reshape(full_probs, newshape, ndim=activations.ndim)
        return fixed
예제 #25
0
파일: __pool.py 프로젝트: kanairen/CubicCNN
def max_pool_3d(input, ds, ignore_border=False):
    # [n,c,x,y,z]以外の入力は受け付けない
    if input.ndim != 5:
        raise NotImplementedError(
            'max_pool_3d requires a input [n, c, x, y, z]')

    # 入力次元
    vid_dim = input.ndim

    # [y, z]フレームの次元数
    frame_shape = input.shape[-2:]

    # バッチサイズ
    # フレーム次元以外の全ての次元の要素数を掛け合わせる
    batch_size = T.prod(input.shape[:-2])
    # http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.shape_padright
    batch_size = T.shape_padright(batch_size, 1)


    new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1, ]), frame_shape),
                       'int32')
    input_4D = T.reshape(input, new_shape, ndim=4)

    op = DownsampleFactorMax((ds[1], ds[2]), ignore_border)
    output = op(input_4D)
    outshape = T.join(0, input.shape[:-2], output.shape[-2:])
    out = T.reshape(output, outshape, ndim=input.ndim)

    shufl = (
        list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [
            vid_dim - 3])
    input_time = out.dimshuffle(shufl)
    vid_shape = input_time.shape[-2:]

    batch_size = T.prod(input_time.shape[:-2])
    batch_size = T.shape_padright(batch_size, 1)

    new_shape = T.cast(T.join(0, batch_size,
                              T.as_tensor([1, ]),
                              vid_shape), 'int32')
    input_4D_time = T.reshape(input_time, new_shape, ndim=4)
    op = DownsampleFactorMax((1, ds[0]), ignore_border)
    outtime = op(input_4D_time)
    outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (
        list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [
            vid_dim - 2])
    return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
예제 #26
0
def _create_maximum_activation_update(output, record, streamindex, topn):
    """
    Calculates update of the topn maximums for one batch of outputs.
    """
    dims, maximums, indices, snapshot = record
    counters = tensor.tile(tensor.shape_padright(
        tensor.arange(output.shape[0]) + streamindex), (1, output.shape[1]))
    if len(dims) == 1:
        # output is a 2d tensor, (cases, units) -> activation
        tmax = output
        # counters is a 2d tensor broadcastable (cases, units) -> case_index
        tind = counters
    else:
        # output is a 4d tensor: fmax flattens it to 3d
        fmax = output.flatten(ndim=3)
        # fargmax is a 2d tensor containing rolled maximum locations
        fargmax = fmax.argmax(axis=2)
        # fetch the maximum. tmax is 2d, (cases, units) -> activation
        tmax = _apply_index(fmax, fargmax, axis=2)
        # targmax is a tuple that separates rolled-up location into (x, y)
        targmax = divmod(fargmax, dims[2])
        # tind is a 3d tensor (cases, units, 3) -> case_index, maxloc
        # this will match indices which is a 3d tensor also
        tind = tensor.stack((counters, ) + targmax, axis=2)
    cmax = tensor.concatenate((maximums, tmax), axis=0)
    cind = tensor.concatenate((indices, tind), axis=0)
    cargsort = (-cmax).argsort(axis=0)[:topn]
    newmax = _apply_perm(cmax, cargsort, axis=0)
    newind = _apply_perm(cind, cargsort, axis=0)
    updates = [(maximums, newmax), (indices, newind)]
    if snapshot:
        csnap = tensor.concatenate((snapshot, output), axis=0)
        newsnap = _apply_perm(csnap, cargsort, axis=0)
        updates.append((snapshot, newsnap))
    return updates
예제 #27
0
파일: rnn.py 프로젝트: JimStearns206/taxi
    def predict_all(self, latitude, longitude, latitude_mask, **kwargs):
        latitude = (latitude - data.train_gps_mean[0]) / data.train_gps_std[0]
        longitude = (longitude - data.train_gps_mean[1]) / data.train_gps_std[1]

        pre_emb = tuple(self.pre_context_embedder.apply(**kwargs))
        latitude = tensor.shape_padright(latitude)
        longitude = tensor.shape_padright(longitude)
        itr = self.input_to_rec.apply(tensor.concatenate(pre_emb + (latitude, longitude), axis=1))
        itr = itr.repeat(4, axis=1)
        (next_states, next_cells) = self.rec.apply(itr, kwargs['states'], kwargs['cells'], mask=latitude_mask, iterate=False)

        post_emb = tuple(self.post_context_embedder.apply(**kwargs))
        rto = self.rec_to_output.apply(tensor.concatenate(post_emb + (next_states,), axis=1))

        rto = (rto * data.train_gps_std) + data.train_gps_mean
        return (rto, next_states, next_cells)
예제 #28
0
 def cost(self):
   """
   :rtype: (theano.Variable | None, dict[theano.Variable,theano.Variable] | None)
   :returns: cost, known_grads
   """
   known_grads = None
   if self.loss == 'ce' or self.loss == 'priori':
     if self.attrs.get("target", "").endswith("[sparse:coo]"):
       assert isinstance(self.y, tuple)
       assert len(self.y) == 3
       from NativeOp import crossentropy_softmax_and_gradient_z_sparse
       y_mask = self.network.j[self.attrs.get("target", "").replace("[sparse:coo]", "[sparse:coo:2:0]")]
       ce, grad_z = crossentropy_softmax_and_gradient_z_sparse(
         self.z, self.index, self.y[0], self.y[1], self.y[2], y_mask)
       return self.norm * T.sum(ce), {self.z: grad_z}
     if self.y_data_flat.type == T.ivector().type:
       # Use crossentropy_softmax_1hot to have a more stable and more optimized gradient calculation.
       # Theano fails to use it automatically; I guess our self.i indexing is too confusing.
       #idx = self.index.flatten().dimshuffle(0,'x').repeat(self.y_m.shape[1],axis=1) # faster than line below
       #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m * idx, y_idx=self.y_data_flat * self.index.flatten())
       nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y_data_flat[self.i])
       #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat)
       #nll = -T.log(T.nnet.softmax(self.y_m)[self.i,self.y_data_flat[self.i]])
       #z_c = T.exp(self.z[:,self.y])
       #nll = -T.log(z_c / T.sum(z_c,axis=2,keepdims=True))
       #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat)
       #nll = T.set_subtensor(nll[self.j], T.constant(0.0))
     else:
       nll = -T.dot(T.log(T.clip(self.p_y_given_x[self.i], 1.e-38, 1.e20)), self.y_data_flat[self.i].T)
     return self.norm * T.sum(nll), known_grads
   elif self.loss == 'entropy':
     h_e = T.exp(self.y_m) #(TB)
     pcx = T.clip((h_e / T.sum(h_e, axis=1, keepdims=True)).reshape((self.index.shape[0],self.index.shape[1],self.attrs['n_out'])), 1.e-6, 1.e6) # TBD
     ee = -T.sum(pcx[self.i] * T.log(pcx[self.i])) # TB
     #nll, pcxs = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y[self.i])
     nll, _ = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat) # TB
     ce = nll.reshape(self.index.shape) * self.index # TB
     y = self.y_data_flat.reshape(self.index.shape) * self.index # TB
     f = T.any(T.gt(y,0), axis=0) # B
     return T.sum(f * T.sum(ce, axis=0) + (1-f) * T.sum(ee, axis=0)), known_grads
     #return T.sum(T.switch(T.gt(T.sum(y,axis=0),0), T.sum(ce, axis=0), -T.sum(ee, axis=0))), known_grads
     #return T.switch(T.gt(T.sum(self.y_m[self.i]),0), T.sum(nll), -T.sum(pcx * T.log(pcx))), known_grads
   elif self.loss == 'priori':
     pcx = self.p_y_given_x[self.i, self.y_data_flat[self.i]]
     pcx = T.clip(pcx, 1.e-38, 1.e20)  # For pcx near zero, the gradient will likely explode.
     return -T.sum(T.log(pcx)), known_grads
   elif self.loss == 'sse':
     if self.y_data_flat.dtype.startswith('int'):
       y_f = T.cast(T.reshape(self.y_data_flat, (self.y_data_flat.shape[0] * self.y_data_flat.shape[1]), ndim=1), 'int32')
       y_oh = T.eq(T.shape_padleft(T.arange(self.attrs['n_out']), y_f.ndim), T.shape_padright(y_f, 1))
       return T.mean(T.sqr(self.p_y_given_x[self.i] - y_oh[self.i])), known_grads
     else:
       #return T.sum(T.sum(T.sqr(self.y_m - self.y.reshape(self.y_m.shape)), axis=1)[self.i]), known_grads
       return T.sum(T.sqr(self.y_m[self.i] - self.y_data_flat.reshape(self.y_m.shape)[self.i])), known_grads
       #return T.sum(T.sum(T.sqr(self.z - (self.y.reshape((self.index.shape[0], self.index.shape[1], self.attrs['n_out']))[:self.z.shape[0]])), axis=2).flatten()[self.i]), known_grads
       #y_z = T.set_subtensor(T.zeros((self.index.shape[0],self.index.shape[1],self.attrs['n_out']), dtype='float32')[:self.z.shape[0]], self.z).flatten()
       #return T.sum(T.sqr(y_z[self.i] - self.y[self.i])), known_grads
       #return T.sum(T.sqr(self.y_m - self.y[:self.z.shape[0]*self.index.shape[1]]).flatten()[self.i]), known_grads
   else:
     assert False, "unknown loss: %s" % self.loss
예제 #29
0
def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
                mode='max'):
    """
    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1])

    :type input: N-D theano tensor of input images.
    :param input: input images. Max pooling will be done over the 2 last
        dimensions.
    :type ds: tuple of length 2
    :param ds: factor by which to downscale (vertical ds, horizontal ds).
        (2,2) will halve the image in each dimension.
    :type ignore_border: bool
    :param ignore_border: When True, (5,5) input with ds=(2,2)
        will generate a (2,2) output. (3,3) otherwise.
    :type st: tuple of lenght 2
    :param st: stride size, which is the number of shifts
        over rows/cols to get the the next pool region.
        if st is None, it is considered equal to ds
        (no overlap on pooling regions)
    :param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
            of the images, pad_h is the size of the top and bottom margins,
            and pad_w is the size of the left and right margins.
    :type padding: tuple of two ints
    :param mode: 'max', 'average_inc_pad' or 'average_exc_pad'.
        Operation executed on each window.  `max` always excludes the padding
        in the computation. `average` gives you the choice to include or
        exclude it.
    :type mode: string
    """
    if input.ndim < 2:
        raise NotImplementedError('max_pool_2d requires a dimension >= 2')
    if input.ndim == 4:
        op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding,
                                 mode=mode)
        output = op(input)
        return output

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(tensor.join(0, batch_size,
                                        tensor.as_tensor([1]),
                                        img_shape), 'int64')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding,
                             mode=mode)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
예제 #30
0
파일: Layers.py 프로젝트: chuckgu/RNN
 def one_hot(self,t, r=None):
     if r is None:
         r = T.max(t) + 1
         
     ranges = T.shape_padleft(T.arange(r), t.ndim)
     
     return T.cast(T.eq(ranges, T.shape_padright(t, 1)) ,dtype =theano.config.floatX)       
예제 #31
0
    def log_likelihood(self, X, Y=None, n_samples=None):
        p_layers = self.p_layers
        q_layers = self.q_layers
        n_layers = len(p_layers)

        if n_samples == None:
            n_samples = self.n_samples

        batch_size = X.shape[0]

        # Get samples
        X = f_replicate_batch(X, n_samples)
        samples, log_p, log_q = self.sample_q(X, None)

        # Reshape and sum
        log_p_all = T.zeros((batch_size, n_samples))
        log_q_all = T.zeros((batch_size, n_samples))
        for l in xrange(n_layers):
            samples[l] = samples[l].reshape(
                (batch_size, n_samples, p_layers[l].n_X))
            log_q[l] = log_q[l].reshape((batch_size, n_samples))
            log_p[l] = log_p[l].reshape((batch_size, n_samples))
            log_p_all += log_p[l]  # agregate all layers
            log_q_all += log_q[l]  # agregate all layers

        # Approximate log P(X)
        log_px = f_logsumexp(log_p_all - log_q_all, axis=1) - T.log(n_samples)

        # Calculate samplig weights
        log_pq = (log_p_all - log_q_all - T.log(n_samples))
        w_norm = f_logsumexp(log_pq, axis=1)
        log_w = log_pq - T.shape_padright(w_norm)
        w = T.exp(log_w)

        # Calculate KL(P|Q), Hp, Hq
        KL = [None] * n_layers
        Hp = [None] * n_layers
        Hq = [None] * n_layers
        for l in xrange(n_layers):
            KL[l] = T.sum(w * (log_p[l] - log_q[l]), axis=1)
            Hp[l] = f_logsumexp(log_w + log_p[l], axis=1)
            Hq[l] = T.sum(w * log_q[l], axis=1)

        return log_px, w, log_p_all, log_q_all, KL, Hp, Hq
예제 #32
0
    def queue_transform(feature_strengths,
                        feature_vects,
                        return_strengths=False):
        """
        Process features according to a "fragmented queue", where each timestep
        gets a size-1 window onto a feature queue. Effectively,
            feature_strengths gives how much to push onto queue
            feature_vects gives what to push on
            pop weights are tied to feature_strengths
            output is a size-1 peek (without popping)

        Parameters:
            - feature_strengths: float32 tensor of shape (batch, push_timestep) in [0,1]
            - feature_vects: float32 tensor of shape (batch, push_timestep, feature_dim)

        Returns:
            - peek_vects: float32 tensor of shape (batch, timestep, feature_dim)
        """
        n_batch, n_time, n_feature = feature_vects.shape

        cum_sum_str = T.extra_ops.cumsum(feature_strengths, 1)

        # We will be working in (batch, timestep, push_timestep)
        # For each timestep, if we subtract out the sum of pushes before that timestep
        # and then cap to 0-1 we get the cumsums for just the features active in that
        # timestep
        timestep_adjustments = T.shape_padright(cum_sum_str -
                                                feature_strengths)
        push_time_cumsum = T.shape_padaxis(cum_sum_str, 1)
        relative_cumsum = push_time_cumsum - timestep_adjustments
        capped_cumsum = T.minimum(T.maximum(relative_cumsum, 0), 1)

        # Now we can recover the peek strengths by taking a diff
        shifted = T.concatenate(
            [T.zeros((n_batch, n_time, 1)), capped_cumsum[:, :, :-1]], 2)
        peek_strengths = capped_cumsum - shifted
        # Peek strengths is now (batch, timestep, push_timestep)

        result = T.batched_dot(peek_strengths, feature_vects)

        if return_strengths:
            return peek_strengths, result
        else:
            return result
예제 #33
0
    def take_look(self,
                  sequence,
                  preprocessed_sequence=None,
                  mask=None,
                  **states):
        r"""Compute attention weights and produce glimpses.

        Parameters
        ----------
        sequence : :class:`~tensor.TensorVariable`
            The sequence, time is the 1-st dimension.
        preprocessed_sequence : :class:`~tensor.TensorVariable`
            The preprocessed sequence. If ``None``, is computed by calling
            :meth:`preprocess`.
        mask : :class:`~tensor.TensorVariable`
            A 0/1 mask specifying available data. 0 means that the
            corresponding sequence element is fake.
        \*\*states
            The states of the agent.

        Returns
        -------
        glimpses : theano variable
            Linear combinations of sequence elements with the attention
            weights.
        weights : theano variable
            The attention weights. The first dimension is batch, the second
            is time.

        """
        if not preprocessed_sequence:
            preprocessed_sequence = self.preprocess(sequence)
        transformed_states = self.state_transformers.apply(return_dict=True,
                                                           **states)
        # Broadcasting of transformed states should be done automatically
        match_vectors = sum(transformed_states.values(), preprocessed_sequence)
        energies = self.energy_computer.apply(match_vectors).reshape(
            match_vectors.shape[:-1], ndim=match_vectors.ndim - 1)
        unormalized_weights = tensor.exp(energies)
        if mask:
            unormalized_weights *= mask
        weights = unormalized_weights / unormalized_weights.sum(axis=0)
        glimpses = (tensor.shape_padright(weights) * sequence).sum(axis=0)
        return glimpses, weights.dimshuffle(1, 0)
예제 #34
0
def max_pool_2d(input, ds, ignore_border=False, st=None):
    """
    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1])

    :type input: N-D theano tensor of input images.
    :param input: input images. Max pooling will be done over the 2 last
        dimensions.
    :type ds: tuple of length 2
    :param ds: factor by which to downscale (vertical ds, horizontal ds).
        (2,2) will halve the image in each dimension.
    :type ignore_border: bool
    :param ignore_border: When True, (5,5) input with ds=(2,2)
        will generate a (2,2) output. (3,3) otherwise.
    :type st: tuple of lenght 2
    :param st: stride size, which is the number of shifts
        over rows/cols to get the the next pool region.
        if st is None, it is considered equal to ds
        (no overlap on pooling regions)

    """
    if input.ndim < 2:
        raise NotImplementedError('max_pool_2d requires a dimension >= 2')

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(
        tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = DownsampleFactorMax(ds, ignore_border, st=st)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
    def get_means_and_covs(self, X, X_embedded):
        """ Get the mean and the covariance for the distribution for the code z

        :param X:               (N x max(L)) matrix representing the text
        :param X_embedded:      (N x max(L) x E) tensor representing the embedded text

        :return:                variational mean and covariance for the latents given a sentence
        """

        # If x is less or equal than 0 then return 0, else 1 (used to filter out words)
        mask = T.switch(T.lt(X, 0), 0, 1)                                       # N x max(L)

        # Reshape the embedding of X adding a singleton dimension on the right
        X_embedded *= T.shape_padright(mask)                                    # N x max(L) x E x 1 (broadcastable)

        means = get_output(self.mean_nn, X_embedded)                            # N x Z
        covs = get_output(self.cov_nn, X_embedded)                              # N x Z

        return means, covs
예제 #36
0
    def build_NADE(self, v, W, V, b, c):
        a = T.shape_padright(v) * T.shape_padleft(W)
        a = a.dimshuffle(1, 0, 2)

        c_init = c
        if c.ndim == 1:
            c_init = T.dot(T.ones((v.shape[0], 1)), T.shape_padleft(c))

        (activations, s), updates = theano.scan(
            lambda V_i, a_i, partial_im1:
            (a_i + partial_im1, T.dot(V_i, T.nnet.sigmoid(partial_im1.T))),
            sequences=[V.T, a],
            outputs_info=[c_init, None])
        s = s.T + b
        y = T.nnet.sigmoid(s)

        cost = -v * T.log(y) - (1 - v) * T.log(1 - y)
        cost = cost.sum() / v.shape[0]
        return s, y, cost
예제 #37
0
def my_pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0),
               mode='max'):
    """
    This function is a patch to the maxpool op of Theano:
    contrarily to current implementation of maxpool, the gradient is backpropagated
    to only one input of a given patch if several inputs have the same value. This is
    consistent with the CuDNN implementation (and therefore the op is replaced by the
    CuDNN version when possible).
    """

    if input.ndim < 2:
        raise NotImplementedError('pool_2d requires a dimension >= 2')

    if not ignore_border is None:
        # check that ignore_border is True if provided
        assert ignore_border
    ignore_border = True

    if input.ndim == 4:
        op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode)
        output = op(input)
        return output

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(tensor.join(0, batch_size,
                                        tensor.as_tensor([1]),
                                        img_shape), 'int64')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
    def __init__(self,
                 rbm,
                 units,
                 dimensions,
                 shared_dimensions,
                 b,
                 name=None):
        super(SharedBiasParameters, self).__init__(rbm, [units], name=name)
        self.var = b
        self.variables = [self.var]
        self.u = units
        self.ud = dimensions
        self.sd = shared_dimensions
        self.nd = self.ud - self.sd

        self.terms[self.u] = lambda vmap: T.shape_padright(self.var, self.sd)

        self.energy_gradients[self.var] = lambda vmap: T.mean(
            vmap[self.u], axis=self._shared_axes(vmap))
예제 #39
0
    def train(self, data):
        data = np.asarray(data, dtype=theano.config.floatX)
        val = T.vector('val')

        min_idx = self.min_dist(val)

        self.nhood_radius = self.radius * T.exp(-self.it / self.time_const)

        dists = self.in_neighbourhood(self.lattice[min_idx])

        in_nhood = dists < self.nhood_radius

        lr = self.start_lr * T.exp(-self.it / self.iterations)

        updates = [(self.W, self.W + self.compute_influence(dists) *
                    T.shape_padright(in_nhood, 1) * lr * (val - self.W))]

        epoch = theano.function(inputs=[val],
                                outputs=self.ret_w(),
                                updates=updates)

        update_iteration = theano.function(inputs=[],
                                           outputs=self.it,
                                           updates={self.it: self.it + 1})

        self.last_W = None

        for i in range(self.iterations):
            index = np.random.random_integers(0, len(data) - 1)
            self.last_W = epoch(data[index])

            #index += 1
            #if index >= len(data):
            #    index = 0

            self.it = update_iteration()

            print i

            # print an image every 100 epochs
            if (i + 1) % self.print_fr == 0:
                self.print_image(i)
예제 #40
0
    def build_aligner(self):
        tgt_action_seq = ndim_itensor(3, 'tgt_action_seq')
        tgt_action_seq_type = ndim_itensor(3, 'tgt_action_seq_type')
        tgt_node_seq = ndim_itensor(2, 'tgt_node_seq')
        tgt_par_rule_seq = ndim_itensor(2, 'tgt_par_rule_seq')
        tgt_par_t_seq = ndim_itensor(2, 'tgt_par_t_seq')

        tgt_node_embed = self.node_embedding[tgt_node_seq]
        query_tokens = ndim_itensor(2, 'query_tokens')
        query_token_embed, query_token_embed_mask = self.query_embedding(
            query_tokens, mask_zero=True)
        batch_size = tgt_action_seq.shape[0]
        max_example_action_num = tgt_action_seq.shape[1]

        tgt_action_seq_embed = T.switch(T.shape_padright(tgt_action_seq[:, :, 0] > 0),
                                        self.rule_embedding_W[tgt_action_seq[:, :, 0]],
                                        self.vocab_embedding_W[tgt_action_seq[:, :, 1]])
        tgt_action_seq_embed_tm1 = tensor_right_shift(tgt_action_seq_embed)
        tgt_par_rule_embed = T.switch(tgt_par_rule_seq[:, :, None] < 0,
                                      T.alloc(0., 1, config.rule_embed_dim),
                                      self.rule_embedding_W[tgt_par_rule_seq])

        if not config.frontier_node_type_feed:
            tgt_node_embed *= 0.
        if not config.parent_action_feed:
            tgt_par_rule_embed *= 0.

        decoder_input = T.concatenate(
            [tgt_action_seq_embed_tm1, tgt_node_embed, tgt_par_rule_embed], axis=-1)
        query_embed = self.query_encoder_lstm(query_token_embed, mask=query_token_embed_mask, dropout=0, srng=self.srng)

        tgt_action_seq_mask = T.any(tgt_action_seq_type, axis=-1)

        alignments = self.decoder_lstm.align(decoder_input, context=query_embed,
                                             context_mask=query_token_embed_mask,
                                             mask=tgt_action_seq_mask,
                                             parent_t_seq=tgt_par_t_seq,
                                             srng=self.srng)

        alignment_inputs = [query_tokens, tgt_action_seq, tgt_action_seq_type,
                            tgt_node_seq, tgt_par_rule_seq, tgt_par_t_seq]
        self.align = theano.function(alignment_inputs, [alignments])
예제 #41
0
파일: mixture.py 프로젝트: xiaoxi0920/pymc3
    def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs):
        self.w = tt.as_tensor_variable(w)
        if not isinstance(comp_dists, Distribution):
            raise TypeError(
                "The MixtureSameFamily distribution only accepts Distribution "
                f"instances as its components. Got {type(comp_dists)} instead."
            )
        self.comp_dists = comp_dists
        if mixture_axis < 0:
            mixture_axis = len(comp_dists.shape) + mixture_axis
            if mixture_axis < 0:
                raise ValueError(
                    "`mixture_axis` is supposed to be in shape of components' distribution. "
                    f"Got {mixture_axis + len(comp_dists.shape)} axis instead out of the bounds."
                )
        comp_shape = to_tuple(comp_dists.shape)
        self.shape = comp_shape[:mixture_axis] + comp_shape[mixture_axis + 1:]
        self.mixture_axis = mixture_axis
        kwargs.setdefault("dtype", self.comp_dists.dtype)

        # Compute the mode so we don't always have to pass a testval
        defaults = kwargs.pop("defaults", [])
        event_shape = self.comp_dists.shape[mixture_axis + 1:]
        _w = tt.shape_padleft(
            tt.shape_padright(w, len(event_shape)),
            len(self.comp_dists.shape) - w.ndim - len(event_shape),
        )
        mode = take_along_axis(
            self.comp_dists.mode,
            tt.argmax(_w, keepdims=True),
            axis=mixture_axis,
        )
        self.mode = mode[(..., 0) + (slice(None), ) * len(event_shape)]

        if not all_discrete(comp_dists):
            mean = tt.as_tensor_variable(self.comp_dists.mean)
            self.mean = (_w * mean).sum(axis=mixture_axis)
            if "mean" not in defaults:
                defaults.append("mean")
        defaults.append("mode")

        super().__init__(defaults=defaults, *args, **kwargs)
예제 #42
0
파일: ops.py 프로젝트: ritheshkumar95/TTS
def AttnDecStep(name, n_input, input_dim, hidden_dim, ctx_dim, ctx, x_t, prev_state, mode='train',weightnorm=True):
    # h_tm1 = prev_state[:,:hidden_dim]
    # c_tm1 = prev_state[:,hidden_dim:]
    h_tm1 = prev_state
    if mode=='open-loop':
        x_t = lib.ops.Embedding(
            'NMT.Embedding_Phons',
            n_input,
            input_dim,
            x_t
            )

    tiled_h_tm1 = T.tile(h_tm1[:,None,:],[1,ctx.shape[1],1])
    e_vec = T.nnet.relu(lib.ops.Linear(
        'NMT.Attention.MLP1',
        T.concatenate([tiled_h_tm1,ctx],-1),
        hidden_dim+ctx_dim,
        hidden_dim
    ))
    e_vec = T.nnet.softmax(lib.ops.Linear(
        'NMT.Attention.MLP2',
        e_vec,
        hidden_dim,
        1
    )[:,:,0]) # (B, seq_len)

    c_t = T.sum(T.shape_padright(e_vec)*ctx,axis=1)
    input_to_rnn = T.concatenate([x_t,c_t],-1)
    # state_t = LSTMStep(name,False, input_dim+ctx_dim, hidden_dim, input_to_rnn, prev_state)
    mask_t = T.ones((x_t.shape[0],)).astype(theano.config.floatX)
    state_t = GRUStep(name, input_dim+ctx_dim, hidden_dim, mask_t, input_to_rnn, h_tm1)
    if mode=='open-loop':
        logits = T.nnet.softmax(lib.ops.Linear(
            name+'.Output.MLP.1',
            T.concatenate([x_t,state_t[:,:hidden_dim],c_t],-1),
            input_dim+hidden_dim+ctx_dim,
            n_input
        ))
        idxs = T.argmax(logits,axis=-1).astype('int32')
        return idxs,state_t
    else:
        return state_t,c_t
예제 #43
0
    def __init__(self, n, p, *args, **kwargs):
        super(Multinomial, self).__init__(*args, **kwargs)

        p = p / tt.sum(p, axis=-1, keepdims=True)

        if len(self.shape) == 2:
            try:
                assert n.shape == (self.shape[0], )
            except AttributeError:
                # this occurs when n is a scalar Python int or float
                n *= tt.ones(self.shape[0])

            self.n = tt.shape_padright(n)
            self.p = p if p.ndim == 2 else tt.shape_padleft(p)
        else:
            self.n = n
            self.p = p

        self.mean = self.n * self.p
        self.mode = tt.cast(tt.round(self.mean), 'int32')
예제 #44
0
    def __init__(
        self, points, values, check_sorted=True, bounds_error=False, nout=-1
    ):
        self.ndim = len(points)
        self.nout = int(nout)

        self.points = [theano.shared(p) for p in points]
        self.values = theano.shared(values)
        if self.values.ndim == self.ndim:
            self.values = tt.shape_padright(self.values)

        self.check_sorted = bool(check_sorted)
        self.bounds_error = bool(bounds_error)

        self.interp_op = RegularGridOp(
            self.ndim,
            nout=self.nout,
            check_sorted=self.check_sorted,
            bounds_error=self.bounds_error,
        )
예제 #45
0
def grad_wrt_list(cost, wrt_list):
    """
    Compute gradient of cost wrt the variables in wrt_list.
    Return a concatenated vector of the results
    """
    if wrt_list == []:
        return T.constant(0.), []

    g_list = T.grad(cost, wrt_list)

    for (n, g) in enumerate(g_list):
        if g.ndim < 1:
            g_list[n] = T.shape_padright(g, n_ones=1)
        elif g.ndim > 1:
            #            raise Exception("Gradients can only be taken wrt vectors.")
            g_list[n] = T.flatten(g)

    g_vec = T.concatenate(g_list)

    return g_vec, g_list
예제 #46
0
def Arodz(X, Y):
    """Takes in two sample sets, one from each class, and
    returns the MAP estimates of w and b
    """
    numberOfFeatures = len(X[0])
    Y = np.reshape(Y, (len(Y), 1))

    # instantiate an empty PyMC3 model
    basic_model = pm.Model()

    # fill the model with details:
    with basic_model:
        mu_prior_cov = 100*np.eye(numberOfFeatures)
        mu_prior_mu = np.zeros((numberOfFeatures,))
        
        # Priors for w,b (Gaussian priors), centered at 0, with very large std.dev.
        w = pm.MvNormal('estimated_w', mu=mu_prior_mu, cov=mu_prior_cov, shape=numberOfFeatures)
        b  = pm.Normal('estimated_b',0,100)

        # calculate u=w^Tx+b
        ww=pm.Deterministic('my_w_as_mx',T.shape_padright(w,1))
        
        # here w, b are unknown to be estimated from data
        # X is the known data matrix [samples x features]
        u = pm.Deterministic('my_u',T.dot(X,ww) + b)
        # u = pm.Deterministic('my_u',X*w + b);
        
        # P(+1|x)=a(u) #see slides for def. of a(u)
        prob = pm.Deterministic('my_prob',1.0 / (1.0 + T.exp(-1.0*u)))
        
        # class +1 is comes from a probability distribution with probability "prob" for +1, and 1-prob for class 0
        # here Y is the known vector of classes
        # prob is (indirectly coming from the estimate of w,b and the data x)
        Y_obs=pm.Bernoulli('Y_obs',p=prob,observed = Y)
    # done with setting up the model

    # now perform maximum likelihood (actually, maximum a posteriori (MAP), since we have priors) estimation
    # map_estimate1 is a dictionary: "parameter name" -> "it's estimated value"
    map_estimate1 = pm.find_MAP(model=basic_model)

    return map_estimate1['estimated_w'], map_estimate1['estimated_b']
예제 #47
0
파일: mlp.py 프로젝트: surban/mlutils
    def __init__(self, loss, n_units, transfer_funcs):
        n_layers = len(n_units)

        print "===== MLP ========="
        print "Number of layers:  ", n_layers
        print "Loss:              ", loss
        print "Number of units:   ", n_units
        print "Transfer function: ", transfer_funcs
        print "==================="

        # create ParameterSet
        vars = {}
        for lyr in range(n_layers):
            if lyr != 0:
                vars["weights_%d_to_%d" % (lyr - 1, lyr)] = (n_units[lyr], n_units[lyr - 1])
                vars["bias_%d" % lyr] = (n_units[lyr],)
            vars.update(self.transfer_func_parameter_shape(lyr, transfer_funcs[lyr], n_units[lyr]))
        self.ps = ParameterSet(**vars)

        # create graph
        v_input = T.fmatrix('v_input')      # v_input[unit, smpl]
        unit_val = [None for _ in range(n_layers)]
        for lyr in range(n_layers):
            if lyr == 0:
                unit_act = v_input
            else:
                unit_act = T.dot(self.weights(lyr - 1, lyr), unit_val[lyr - 1]) + T.shape_padright(self.bias(lyr))
            unit_val[lyr] = self.make_transfer_func(lyr, transfer_funcs[lyr])(unit_act)
        output = unit_val[-1]
        self.f_predict = function([self.ps.flat, v_input], output, name='f_predict')

        # calculate loss
        if loss is not None:
            v_target = T.fmatrix('v_target')    # v_target[unit, smpl]
            fit_smpl_loss = self.fit_loss(loss, transfer_funcs[-1], v_target, output)
            fit_loss = T.mean(fit_smpl_loss)
            loss = fit_loss

            dloss = T.grad(loss, self.ps.flat)
            self.f_loss = function([self.ps.flat, v_input, v_target], loss, name='f_loss')
            self.f_loss_grad = function([self.ps.flat, v_input, v_target], dloss, name='f_loss_grad')
예제 #48
0
def pool_1d_Lasagne(x, axis=1, mode='max'):
    """
    Lasagne requires x is 3D, and pooling is done on the last dimension
    :param x:
    :param axis:
    :return:
    """
    input_4d = tensor.shape_padright(x, 1)
    if axis == 1:
        input_4d = input_4d.dimshuffle((0, 2, 1, 3))
    pooled = pool_2d(
        input_4d,
        ws=(2, 1),
        stride=(2, 1),
        ignore_border=True,
        pad=(0, 0),
        mode=mode,
    )
    if axis == 1:  # [DV] add support for 'axis' para
        pooled = pooled.dimshuffle((0, 2, 1, 3))
    return pooled[:, :, :, 0]
예제 #49
0
    def compute_weighted_averages(self, weights, attended):
        """Compute weighted averages of the attended sequence vectors.

        Parameters
        ----------
        weights : :class:`~theano.Variable`
            The weights. The shape must be equal to the attended shape
            without the last dimension.
        attended : :class:`~theano.Variable`
            The attended. The index in the sequence must be the first
            dimension.

        Returns
        -------
        weighted_averages : :class:`~theano.Variable`
            The weighted averages of the attended elements. The shape
            is equal to the attended shape with the first dimension
            dropped.

        """
        return (tensor.shape_padright(weights) * attended).sum(axis=0)
예제 #50
0
    def concatenate_basic(self, query_token_embed, query_tokens_phrase,
                          query_tokens_pos, query_tokens_canon_id):
        transform = lambda tokens: T.shape_padright(tokens)

        # concatenate query_token_embed with query_tokens_phrase and query_tokens_pos,
        # essentially expanding the embedding to incorporate the new data
        if config.include_cid == True:
            return T.concatenate([
                query_token_embed,
                transform(query_tokens_phrase),
                transform(query_tokens_pos),
                transform(query_tokens_canon_id)
            ],
                                 axis=2)
        else:
            return T.concatenate([
                query_token_embed,
                transform(query_tokens_phrase),
                transform(query_tokens_pos)
            ],
                                 axis=2)
예제 #51
0
def conv2d_same(input, filters, input_shape=(None, None, None, None), filter_shape=(None, None, None, None),
                padding=None):
    assert input.ndim == 4 and filters.ndim == 4
    assert (4 == len(input_shape)) and (4 == len(filter_shape))
    assert (1 == filter_shape[2] % 2) and (1 == filter_shape[3] % 2)
    if (tuple(input_shape[2:4]) == (1, 1) and tuple(filter_shape[2:4]) == (1, 1)) or (
                    tuple(filter_shape[2:4]) == (1, 1) and theano.config.device == "cpu"):
        return tensor4dot(input, filters)
    else:
        new_row_begin = filters.shape[2] / 2
        new_row_end = input.shape[2] + filters.shape[2] / 2
        new_col_begin = filters.shape[3] / 2
        new_col_end = input.shape[3] + filters.shape[3] / 2
        if padding is not None:
            assert 1 == padding.ndim
            padded_input = TT.ones((
                input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1,
                input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX)
            padded_input = TT.set_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end],
                                            numpy_floatX(0))
            padding = TT.shape_padleft(TT.shape_padright(padding, 2), 1)
            padded_input = padding * padded_input
        else:
            padded_input = TT.zeros((
                input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1,
                input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX)
        padded_input = TT.inc_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end], input)
        new_input_shape = [None, None, None, None]
        if input_shape[0] is not None:
            new_input_shape[0] = input_shape[0]
        if input_shape[1] is not None:
            new_input_shape[1] = input_shape[1]
        if input_shape[2] is not None and filter_shape[2] is not None:
            new_input_shape[2] = input_shape[2] + filter_shape[2] - 1
        if input_shape[3] is not None and filter_shape[3] is not None:
            new_input_shape[3] = input_shape[3] + filter_shape[3] - 1
        ret = TT.nnet.conv2d(input=padded_input, filters=filters, border_mode='valid',
                             image_shape=tuple(new_input_shape), filter_shape=filter_shape)

        return ret
예제 #52
0
            def theano_code(y, x, xs, ys, zs, xo, yo, ro):

                # Get the z coord
                z = tt.sqrt(1 - x**2 - y**2)

                # Compute the intensity
                pT = self.map_ref.ops.pT(x, y, z)

                # Weight the intensity by the illumination
                # Dot the polynomial into the basis
                intensity = tt.shape_padright(tt.dot(pT, self._A1y))

                # Weight the intensity by the illumination
                xyz = tt.concatenate((
                    tt.reshape(x, [1, -1]),
                    tt.reshape(y, [1, -1]),
                    tt.reshape(z, [1, -1]),
                ))
                I = self.map_ref.ops.compute_illumination_point_source(
                    xyz,
                    xs,
                    ys,
                    zs,
                    tt.as_tensor_variable(0.0),
                    tt.as_tensor_variable(np.array(False)),
                )
                intensity = tt.switch(tt.isnan(intensity), intensity,
                                      intensity * I)[0, 0]

                # Check if the point is visible
                result = ifelse(
                    ((x - xo)**2 + (y - yo)**2 < ro**2)[0],
                    tt.as_tensor_variable(0.0).astype(tt.config.floatX),
                    ifelse(
                        (x**2 + y**2 > 1)[0],
                        tt.as_tensor_variable(0.0).astype(tt.config.floatX),
                        intensity,
                    ),
                )
                return result
예제 #53
0
    def step(self, ipt, state, state_strength, dropout_masks=None):
        """
        Perform a single step of the network

        Params:
            ipt: The current input. Should be an int tensor of shape (n_batch, self.input_width)
            state: The previous state. Should be a float tensor of shape (n_batch, self.output_width)
            state_strength: Strength of the previous state. Should be a float tensor of shape
                (n_batch)
            dropout_masks: Masks from get_dropout_masks

        Returns: The next output state, and the next output strength
        """
        if dropout_masks is not None:
            ipt_masks, state_masks = dropout_masks
            ipt = ipt * ipt_masks
            state = state * state_masks

        obs_state = state * T.shape_padright(state_strength)
        cat_ipt_state = T.concatenate([ipt, obs_state], 1)
        reset = do_layer(T.nnet.sigmoid, cat_ipt_state, self._reset_W,
                         self._reset_b)
        update = do_layer(T.nnet.sigmoid, cat_ipt_state, self._update_W,
                          self._update_b)
        update_state = update[:, :-1]
        update_strength = update[:, -1]

        cat_reset_ipt_state = T.concatenate([ipt, (reset * obs_state)], 1)
        candidate_act = do_layer(T.tanh, cat_reset_ipt_state,
                                 self._activation_W, self._activation_b)
        candidate_strength = do_layer(
            T.nnet.sigmoid, cat_reset_ipt_state, self._strength_W,
            self._strength_b).reshape(state_strength.shape)

        newstate = update_state * state + (1 - update_state) * candidate_act
        newstrength = update_strength * state_strength + (
            1 - update_strength) * candidate_strength

        return newstate, newstrength
예제 #54
0
    def get_relative_position(self, t, light_delay=False):
        """The planets' positions relative to the star

        Args:
            t: The times where the position should be evaluated.

        Returns:
            The components of the position vector at ``t`` in units of
            ``R_sun``.

        """
        if light_delay:
            raise NotImplementedError(
                "Light travel time delay is not implemented for simple orbits"
            )
        dt = tt.mod(tt.shape_padright(t) - self._ref_time, self.period)
        dt -= self._half_period
        x = tt.squeeze(self.speed * dt)
        y = tt.squeeze(self._b_norm + tt.zeros_like(dt))
        m = tt.abs_(dt) < 0.5 * self.duration
        z = tt.squeeze(m * 1.0 - (~m) * 1.0)
        return x, y, z
예제 #55
0
    def __init__(self, theta, alpha, kappa, gamma=0, sigma=1, *args, **kwargs):
        super(GRMLike, self).__init__(*args, **kwargs)

        self.param_list = []
        for var in [theta, alpha, kappa, gamma, sigma]:
            self.param_list.append(tt.as_tensor_variable(floatX(var)))

        par_names = ['theta', 'alpha', 'kappa', 'gamma', 'sigma']
        self.params = {nm: var for nm, var in zip(par_names, self.param_list)}

        self.cprobst, self.probst = self.__init_probs()

        # Set number of categories
        self.k = tt.shape(self.probst)[-1]

        # Compute mode for each response category
        self.mode = tt.argmax(self.probst, axis=-1)

        # Numpy fancy indexing to allow observed data to index
        # probability tensor
        self.index = (tt.shape_padright(tt.arange(self.probst.shape[0])),
                      tt.shape_padleft(tt.arange(self.probst.shape[1])))
 def __call__(self, x):
     for k1, k2 in self.res_kernels[0]:
         x += T.nnet.conv2d(relu(T.nnet.conv2d(x, k1, border_mode='half')),
                            k2,
                            border_mode='half')
     for i, (k, b) in enumerate(zip(self.layer_kernels, self.layer_biases)):
         if i > 0: x = relu(x)
         if self.upscale > 1:
             x = x.repeat(self.upscale,
                          axis=2)[:, :, :-1].repeat(self.upscale,
                                                    axis=3)[:, :, :, :-1]
         x = T.nnet.conv2d(x,
                           k,
                           subsample=(self.downscale, self.downscale),
                           border_mode='half') + T.shape_padright(b, 2)
         for k1, k2 in self.res_kernels[i + 1]:
             x += T.nnet.conv2d(relu(
                 T.nnet.conv2d(x, k1, border_mode='half')),
                                k2,
                                border_mode='half')
     if self.output == 'linear': return x
     elif self.output == 'relu': return relu(x)
예제 #57
0
def get_light_curve(time, tpeaks, fwhms, ampls, texp=None, oversample=7):
    time = time.astype("float64")
    time = tt.as_tensor_variable(time)

    if texp is None:
        tgrid = time
    if texp is not None:
        # taking this oversample code from
        # https://github.com/dfm/exoplanet
        # and https://github.com/lkreidberg/batman
        oversample = int(oversample)
        oversample += 1 - oversample % 2
        dt = np.linspace(-texp / 2.0, texp / 2.0, oversample)
        tgrid = tt.shape_padright(time) + dt

    multiflare_lc = multiflaremodel(tgrid, tpeaks, fwhms, ampls)

    if texp is not None:
        multiflare_lc = tt.mean(
            tt.reshape(multiflare_lc, (-1, oversample)), axis=1
        )
    return multiflare_lc
예제 #58
0
    def __init__(self, input, image_shape, pool_size, sparse_count):

        #not implementing max pooling as of now. have to do with average pooling
        oneZeros = numpy.concatenate(([1], numpy.zeros(sparse_count)))
        x = numpy.insert(numpy.tile(oneZeros, pool_size[0] - 1),
                         (pool_size[0] - 1) * (len(oneZeros)), 1)
        y = numpy.insert(numpy.tile(oneZeros, pool_size[1] - 1),
                         (pool_size[1] - 1) * (len(oneZeros)), 1)
        z = numpy.insert(numpy.tile(oneZeros, pool_size[2] - 1),
                         (pool_size[2] - 1) * (len(oneZeros)), 1)
        mask = numpy.outer(numpy.outer(x, y),
                           z).reshape(len(x), len(y), len(z))
        mask = numpy.ones((1, 1, len(x), len(y), len(z))) * mask
        self.pool_mask = mask.astype(
            theano.config.floatX) / numpy.prod(pool_size)

        frame_shape = input.shape[-3:]
        batch_size = T.shape_padright(T.prod(input.shape[:-3]), 1)
        new_shape = T.cast(
            T.join(0, batch_size, T.as_tensor([
                1,
            ]), frame_shape), 'int32')
        filter_shape = (1, 1, len(x), len(y), len(z))
        input_5d = T.reshape(input, new_shape, ndim=5)
        image_shape = (image_shape[0] * image_shape[1], 1, image_shape[2],
                       image_shape[3], image_shape[4])
        avg_out = conv3d(
            signals=input_5d.dimshuffle([0, 2, 1, 3, 4]),
            filters=self.pool_mask.transpose(0, 2, 1, 3, 4),
            signals_shape=[image_shape[i] for i in [0, 2, 1, 3, 4]],
            filters_shape=[filter_shape[i] for i in [0, 2, 1, 3, 4]],
            border_mode='valid').dimshuffle([0, 2, 1, 3, 4])
        outshp = T.join(0, input.shape[:-3], avg_out.shape[-3:])
        avg_out = T.reshape(avg_out, outshp, ndim=5)

        self.outputlen = (image_shape[2] - len(x) + 1,
                          image_shape[3] - len(y) + 1,
                          image_shape[4] - len(z) + 1)
        self.output = avg_out
예제 #59
0
def _apply_index(data, indices, axis=0):
    """
    Indexes data along a single axis.

    Indicies is a tensor of indices shaped like data minus the given axis.
    The result is a slice of data removing the given axis: for each entry
    of the othe dimiensions, the given index for that axis is used to select
    the single item.

    _apply_index can be used to derefernce the tensor search results
    returned from tensor.argmax().
    """
    ndim = data.type.ndim
    shape = data.shape
    if indices.type.ndim < ndim - 1:
        indices = tensor.shape_padright(indices,
                                        n_ones=ndim - indices.type.ndim - 1)
    return data[tuple(
        indices if a == axis else
        _axis_count(shape, a, ndim -
                    1) if a < axis else _axis_count(shape, a - 1, ndim - 1)
        for a in range(ndim))]
예제 #60
0
    def __call__(self, target, context, mask=None):
        target = target * T.cast(T.shape_padright(mask), 'float32')
        padded_mask = self.get_padded_shuffled_mask(mask, pad=1)
        # target = theano.printing.Print('X::' + self.name)(target)
        X_shifted = T.concatenate([
            alloc_zeros_matrix(target.shape[0], 1, self.input_dim),
            target[:, 0:-1, :]
        ],
                                  axis=-2)

        # X = theano.printing.Print('X::' + self.name)(X)
        # X = T.zeros_like(target)
        # T.set_subtensor(X[:, 1:, :], target[:, 0:-1, :])

        X = X_shifted.dimshuffle((1, 0, 2))

        ctx_step = context.dimshuffle(('x', 0, 1))
        x_z = T.dot(X, self.W_z) + T.dot(ctx_step, self.C_z) + self.b_z
        x_r = T.dot(X, self.W_r) + T.dot(ctx_step, self.C_r) + self.b_r
        x_h = T.dot(X, self.W_h) + T.dot(ctx_step, self.C_h) + self.b_h

        h, updates = theano.scan(self._step,
                                 sequences=[x_z, x_r, x_h, padded_mask],
                                 outputs_info=T.unbroadcast(
                                     alloc_zeros_matrix(
                                         X.shape[1], self.hidden_dim), 1),
                                 non_sequences=[self.U_z, self.U_r, self.U_h])

        # (batch_size, max_token_len, hidden_dim)
        h = h.dimshuffle((1, 0, 2))

        # (batch_size, max_token_len, vocab_size)
        predicts = T.dot(h, self.U_y) + T.dot(context.dimshuffle(
            (0, 'x', 1)), self.C_y) + self.b_y  # + T.dot(X_shifted, self.W_y)

        predicts_flatten = predicts.reshape((-1, predicts.shape[2]))
        return T.nnet.softmax(predicts_flatten).reshape(
            (predicts.shape[0], predicts.shape[1], predicts.shape[2]))