Exemple #1
def _span_sums(stt, end, p_lens, max_p_len, batch_size, dim, max_ans_len):
  # Sum of every start element and corresponding max_ans_len end elements.
  # stt     (max_p_len, batch_size, dim)
  # end     (max_p_len, batch_size, dim)
  # p_lens  (batch_size,)
  max_ans_len_range = tt.shape_padleft(tt.arange(max_ans_len))          # (1, max_ans_len)
  offsets = tt.shape_padright(tt.arange(max_p_len))                     # (max_p_len, 1)
  end_idxs = max_ans_len_range + offsets                                # (max_p_len, max_ans_len)
  end_idxs_flat = end_idxs.flatten()                                    # (max_p_len*max_ans_len,)

  end_padded = tt.concatenate(                                          # (max_p_len+max_ans_len-1, batch_size, dim)
    [end, tt.zeros((max_ans_len-1, batch_size, dim))], axis=0)    
  end_structured = end_padded[end_idxs_flat]                            # (max_p_len*max_ans_len, batch_size, dim)
  end_structured = end_structured.reshape(                              # (max_p_len, max_ans_len, batch_size, dim)
    (max_p_len, max_ans_len, batch_size, dim))
  stt_shuffled = stt.dimshuffle((0,'x',1,2))                            # (max_p_len, 1, batch_size, dim)

  span_sums = stt_shuffled + end_structured                             # (max_p_len, max_ans_len, batch_size, dim)
  span_sums_reshaped = span_sums.dimshuffle((2,0,1,3)).reshape(         # (batch_size, max_p_len*max_ans_len, dim)
    (batch_size, max_p_len*max_ans_len, dim))

  p_lens_shuffled = tt.shape_padright(p_lens)                           # (batch_size, 1)
  end_idxs_flat_shuffled = tt.shape_padleft(end_idxs_flat)              # (1, max_p_len*max_ans_len)

  span_masks_reshaped = tt.lt(end_idxs_flat_shuffled, p_lens_shuffled)  # (batch_size, max_p_len*max_ans_len)
  span_masks_reshaped = cast_floatX(span_masks_reshaped)

  # (batch_size, max_p_len*max_ans_len, dim), (batch_size, max_p_len*max_ans_len)
  return span_sums_reshaped, span_masks_reshaped
 def create_prediction(self):#做一次predict的方法
     for i in xrange(1,7):#前6次(0-5),输出之前的先做的6个frame,之后第7次是第1个输出
     if self.steps > 1:
         for i in xrange(2,self.steps):
             #need T.shape_padright???
     return self.results
def maxpool_3D(input, ds, ignore_border=False):
    #input.dimshuffle (0, 2, 1, 3, 4)   # convert to make video in back. 
    # no need to reshuffle. 
    if input.ndim < 3:
        raise NotImplementedError('max_pool_3d requires a dimension >= 3')

    # extract nr dimensions
    vid_dim = input.ndim
    # max pool in two different steps, so we can use the 2d implementation of 
    # downsamplefactormax. First maxpool frames as usual. 
    # Then maxpool the time dimension. Shift the time dimension to the third 
    # position, so rows and cols are in the back

    # extract dimensions
    frame_shape = input.shape[-2:]
    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input.shape[:-2])
    batch_size = T.shape_padright(batch_size,1)
    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = T.cast(T.join(0, batch_size,
                                        frame_shape), 'int32')
    input_4D = T.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of videos in rows and cols
    op = DownsampleFactorMax((ds[1],ds[2]), ignore_border)          # so second and third dimensions of ds are for height and width
    output = op(input_4D)
    # restore to original shape                                     
    outshape = T.join(0, input.shape[:-2], output.shape[-2:])
    out = T.reshape(output, outshape, ndim=input.ndim)

    # now maxpool time
    # output (time, rows, cols), reshape so that time is in the back
    shufl = (list(range(vid_dim-3)) + [vid_dim-2]+[vid_dim-1]+[vid_dim-3])
    input_time = out.dimshuffle(shufl)
    # reset dimensions
    vid_shape = input_time.shape[-2:]
    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input_time.shape[:-2])
    batch_size = T.shape_padright(batch_size,1)
    # store as 4D tensor with shape: (batch_size,1,width,time)
    new_shape = T.cast(T.join(0, batch_size,
                                        vid_shape), 'int32')
    input_4D_time = T.reshape(input_time, new_shape, ndim=4)
    # downsample mini-batch of videos in time
    op = DownsampleFactorMax((1,ds[0]), ignore_border)            # Here the time dimension is downsampled. 
    outtime = op(input_4D_time)
    # output 
    # restore to original shape (xxx, rows, cols, time)
    outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2])
    #rval = T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
    return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def roc_curves(y_true, y_predicted):
    "returns roc curves calculated axis -1-wise"
    fps, tps, thresholds = _binary_clf_curves(y_true, y_predicted)
    last_col = _last_col_idx(y_true.ndim)
    fpr = fps.astype('float32') / T.shape_padright(fps[last_col], 1)
    tpr = tps.astype('float32') / T.shape_padright(tps[last_col], 1)
    return fpr, tpr, thresholds
Exemple #5
    def __init__(self, n, p, *args, **kwargs):
        super(Multinomial, self).__init__(*args, **kwargs)

        p = p / tt.sum(p, axis=-1, keepdims=True)
        n = np.squeeze(n) # works also if n is a tensor

        if len(self.shape) > 1:
            m = self.shape[-2]
                assert n.shape == (m,)
            except (AttributeError, AssertionError):
                n = n * tt.ones(m)
            self.n = tt.shape_padright(n)
            self.p = p if p.ndim > 1 else tt.shape_padleft(p)
        elif n.ndim == 1:
            self.n = tt.shape_padright(n)
            self.p = p if p.ndim > 1 else tt.shape_padleft(p)
            # n is a scalar, p is a 1d array
            self.n = tt.as_tensor_variable(n)
            self.p = tt.as_tensor_variable(p)

        self.mean = self.n * self.p
        mode = tt.cast(tt.round(self.mean), 'int32')
        diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
        inc_bool_arr = tt.abs_(diff) > 0
        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
        self.mode = mode
def getTheanoSimilarityFunction():
    Return a theano function erforming valid convolution of a filter on an
    # Define the input variables to the function
    patches = T.tensor3(dtype='float32') # AxBx(patchsize**2)
    filters = T.matrix(dtype='float32') # Cx(patchsize**2)
    globalMean = T.vector(dtype='float32')
    globalStd = T.vector(dtype='float32')
    # Perform canonical processing of the patches
    meanstd = patches.std()
    mean = T.shape_padright(patches.mean(2), n_ones=1)
    std = T.shape_padright(patches.std(2) + 0.1 * meanstd, n_ones=1)  
    std = T.shape_padright(patches.std(2) + 1e-6, n_ones=1)  
    canonicalPatches_ = (patches - mean) / std  
    canonicalPatches = (canonicalPatches_ - globalMean) / globalStd  

    # Compute the similarities between each patch and each filter
    similarities = T.tensordot(canonicalPatches, filters, axes=[[2],[1]]) # AxBxC
    normFactor = ((canonicalPatches** 2).sum(2) ** 0.5)
    normFactorPadded = T.shape_padright(normFactor, n_ones=1)
    # Normalize the similarities by the norm of the patches
    similaritiesNorm = (similarities / normFactorPadded)
    # Compile and return the theano function
    f = theano.function([patches, filters, globalMean, globalStd], 
                        similaritiesNorm, on_unused_input='ignore')
    return f
Exemple #7
    def sym_mask_logdensity_estimator_intermediate(self, x, mask):
        non_linearity_name = self.parameters["nonlinearity"].get_name()
        assert non_linearity_name == "sigmoid" or non_linearity_name == "RLU"
        x = x.T  # BxD
        mask = mask.T  # BxD
        output_mask = constantX(1) - mask  # BxD
        D = constantX(self.n_visible)
        d = mask.sum(1)  # d is the 1-based index of the dimension whose value to infer (not the size of the context)
        masked_input = x * mask  # BxD
        h = self.nonlinearity(T.dot(masked_input, self.W1) + T.dot(mask, self.Wflags) + self.b1)  # BxH
        for l in xrange(self.n_layers - 1):
            h = self.nonlinearity(T.dot(h, self.Ws[l]) + self.bs[l])  # BxH
        z_alpha = T.tensordot(h, self.V_alpha, [[1], [1]]) + T.shape_padleft(self.b_alpha)
        z_mu = T.tensordot(h, self.V_mu, [[1], [1]]) + T.shape_padleft(self.b_mu)
        z_sigma = T.tensordot(h, self.V_sigma, [[1], [1]]) + T.shape_padleft(self.b_sigma)
        temp = T.exp(z_alpha)  # + 1e-6
        # temp += T.shape_padright(temp.sum(2)/1e-3)
        Alpha = temp / T.shape_padright(temp.sum(2))  # BxDxC
        Mu = z_mu  # BxDxC
        Sigma = T.exp(z_sigma)  # + 1e-6 #BxDxC

        # Alpha = Alpha * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Mu = Mu * T.shape_padright(output_mask)
        # Sigma = Sigma * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x*output_mask)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2*np.pi)) #BxDxC

        Phi = (
            -constantX(0.5) * T.sqr((Mu - T.shape_padright(x)) / Sigma)
            - T.log(Sigma)
            - constantX(0.5 * np.log(2 * np.pi))
        )  # BxDxC
        logdensity = (log_sum_exp(Phi + T.log(Alpha), axis=2) * output_mask).sum(1) * D / (D - d)
        return (logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h)
 def create_prediction(self):  # 做一次predict的方法
     gfs = self.gfs
     pm25in = self.pm25in
     # 初始第一次前传
     self.layerstatus = self.model.forward(
         T.concatenate([gfs[:, 0], gfs[:, 1], gfs[:, 2], pm25in[:, 0], pm25in[:, 1], self.cnt[:, :, 0]], axis=1)
     # results.shape?40*1
     self.results = self.layerstatus[-1]
     if self.steps > 1:
         self.layerstatus = self.model.forward(
             T.concatenate([gfs[:, 1], gfs[:, 2], gfs[:, 3], pm25in[:, 1], self.results, self.cnt[:, :, 1]], axis=1),
         self.results = T.concatenate([self.results, self.layerstatus[-1]], axis=1)
         # 前传之后step-2次
         for i in xrange(2, self.steps):
             self.layerstatus = self.model.forward(
                         gfs[:, i],
                         gfs[:, i + 1],
                         gfs[:, i + 2],
                         T.shape_padright(self.results[:, i - 2]),
                         T.shape_padright(self.results[:, i - 1]),
                         self.cnt[:, :, i],
             # need T.shape_padright???
             self.results = T.concatenate([self.results, self.layerstatus[-1]], axis=1)
     return self.results
Exemple #9
 def _warp_times(self, t):
     delta = tt.shape_padleft(t) / tt.shape_padright(self.period, t.ndim)
     delta += tt.shape_padright(self._base_time, t.ndim)
     ind = tt.cast(tt.floor(delta), "int64")
     dt = tt.stack([ttv[tt.clip(ind[i], 0, ttv.shape[0]-1)]
                    for i, ttv in enumerate(self.ttvs)], -1)
     return tt.shape_padright(t) + dt
Exemple #10
    def prediction(self, h, bias):
        srng = RandomStreams(seed=42)

        prop, mean_x, mean_y, std_x, std_y, rho, bernoulli = \
            self.compute_parameters(h, bias)

        mode = T.argmax(srng.multinomial(pvals=prop, dtype=prop.dtype), axis=1)

        v = T.arange(0, mean_x.shape[0])
        m_x = mean_x[v, mode]
        m_y = mean_y[v, mode]
        s_x = std_x[v, mode]
        s_y = std_y[v, mode]
        r = rho[v, mode]
        # cov = r * (s_x * s_y)

        normal = srng.normal((h.shape[0], 2))
        x = normal[:, 0]
        y = normal[:, 1]

        # x_n = T.shape_padright(s_x * x + cov * y + m_x)
        # y_n = T.shape_padright(s_y * y + cov * x + m_y)

        x_n = T.shape_padright(m_x + s_x * x)
        y_n = T.shape_padright(m_y + s_y * (x * r + y * T.sqrt(1.-r**2)))

        uniform = srng.uniform((h.shape[0],))
        pin = T.shape_padright(T.cast(bernoulli > uniform, floatX))

        return T.concatenate([x_n, y_n, pin], axis=1)
Exemple #11
 def filter_spike_train(n,S,taus):
     """ Helper function to filter the spike train
     filt = T.shape_padright(filt_fn(taus[n]), n_ones=1)
     filtered_S = conv2d(T.shape_padright(S[:,n], n_ones=1), 
     return filtered_S[0,:,0]
Exemple #12
 def __call__(self, crf, X, Y, mask=None, flank=0):
     Yh = self.decode(crf, X, Y)
     L = self.loss(Yh, Y)
     C = confusion(T.argmax(Yh,axis=-1), Y, Yh.shape[-1])
     if mask is not None:
         L *= T.shape_padright(mask)
         C *= T.shape_padright(T.shape_padright(mask))
     n = Yh.shape[0]
     return L[flank:n-flank], C[flank:n-flank]
 def dfe_dlhat(self, g_hat, h_hat, l_hat, v):
     # term from loss function
     dloss_dl = self.label_multiplier * (T.dot(h_hat, self.Whl) + self.lbias)
     rval = dloss_dl * l_hat - l_hat * T.shape_padright(T.sum(l_hat * dloss_dl, axis=1))
     # term from entropy.
     # dentropy = T.sum(-l_hat * T.log(l_hat), axis=1)
     dentropy = - T.xlogx.xlogx(l_hat) - l_hat +\
                  l_hat * T.shape_padright(T.sum(T.xlogx.xlogx(l_hat) + l_hat, axis=1))
     return rval + dentropy
Exemple #14
 def _theano_confusion(self, Yh, Y, mask):
     Yh = T.argmax(Yh, axis=-1)
     shape = list(Yh.shape) + [self.n_out, self.n_out]
     C = T.zeros(shape, dtype='int64')
     i,j = T.mgrid[0:C.shape[0], 0:C.shape[1]]
     C = T.set_subtensor(C[i,j,Y,Yh], 1)
     mask = T.shape_padright(T.shape_padright(mask))
     C = C*mask
     return C
Exemple #15
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma)
            return (p, a, x)
Exemple #16
 def loss(self, X, mask=None, flank=0, Z=None):
     if Z is None:
         Z = self.transform(self.noise(X), mask=mask)
     E = self.emit(Z)
     L = cross_entropy(E, X)
     C = confusion(T.argmax(E,axis=-1), X, E.shape[-1])
     if mask is not None:
         L *= T.shape_padright(mask)
         C *= T.shape_padright(T.shape_padright(mask))
     n = X.shape[0]
     return L[flank:n-flank], C[flank:n-flank]
Exemple #17
def max_pool_3d(input, ds, ignore_border=False):
		Takes as input a N-D tensor, where N >= 3. It downscales the input video by
		the specified factor, by keeping only the maximum value of non-overlapping
		patches of size (ds[0],ds[1],ds[2]) (time, height, width)  
		:type input: N-D theano tensor of input images.
		:param input: input images. Max pooling will be done over the 3 last dimensions.
		:type ds: tuple of length 3
		:param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension.
		:param ignore_border: boolean value. Example when True, (5,5,5) input with ds=(2,2,2) will generate a
		(2,2,2) output. (3,3,3) otherwise.
	if input.ndim < 3:
		raise NotImplementedError('max_pool_3d requires a dimension >= 3')
	vid_dim = input.ndim
	#Maxpool frame
	frame_shape = input.shape[-2:]

	# count the number of "leading" dimensions, store as dmatrix
	batch_size = T.prod(input.shape[:-2])
	batch_size = T.shape_padright(batch_size,1)
	new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),frame_shape), 'int32')
	input_4D = T.reshape(input, new_shape, ndim=4)
	# downsample mini-batch of videos in rows and cols
	op = DownsampleFactorMax((ds[1],ds[2]), ignore_border)
	output = op(input_4D)
	# restore to original shape
	outshape = T.join(0, input.shape[:-2], output.shape[-2:])
	out = T.reshape(output, outshape, ndim=input.ndim)
	#Maxpool time 
	# output (time, rows, cols), reshape so that time is in the back
	shufl = (list(range(vid_dim-4)) + list(range(vid_dim-3,vid_dim))+[vid_dim-4])
	input_time = out.dimshuffle(shufl)
	# reset dimensions
	vid_shape = input_time.shape[-2:]
	# count the number of "leading" dimensions, store as dmatrix
	batch_size = T.prod(input_time.shape[:-2])
	batch_size = T.shape_padright(batch_size,1)
	# store as 4D tensor with shape: (batch_size,1,width,time)
	new_shape = T.cast(T.join(0, batch_size,T.as_tensor([1,]),vid_shape), 'int32')
	input_4D_time = T.reshape(input_time, new_shape, ndim=4)
	# downsample mini-batch of videos in time
	op = DownsampleFactorMax((1,ds[0]), ignore_border)
	outtime = op(input_4D_time)
	# restore to original shape (xxx, rows, cols, time)
	outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
	shufl = (list(range(vid_dim-4)) + [vid_dim-1] + list(range(vid_dim-4,vid_dim-1)))
	#shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2])
	return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
Exemple #18
    def apply(self, input_, application_call):
        """Apply the linear transformation followed by masking with noise.
        input_ : :class:`~tensor.TensorVariable`
            The input on which to apply the transformations
        output : :class:`~tensor.TensorVariable`
            The transformed input

        # When not in training mode, turn off noise
        if not self._training_mode:
            return input_

        if self.tied_sigma:
            average = tensor.shape_padright(self.flatten.apply(input_), 2)
            noise_level = (self.prior_noise_level -
                    tensor.clip(self.mask.apply(average), -16, 16))
            noise_level = tensor.patternbroadcast(noise_level,
                    (False, False, True, True))
            noise_level = copy_and_tag_noise(
                    noise_level, self, LOG_SIGMA, 'log_sigma')
            average = input_
            noise_level = (self.prior_noise_level -
                    tensor.clip(self.mask.apply(input_), -16, 16))
            noise_level = copy_and_tag_noise(
                    noise_level, self, LOG_SIGMA, 'log_sigma')
        # Allow incomplete batches by just taking the noise that is needed
        if self.tied_noise:
            if self.noise_batch_size is not None:
                noise = self.parameters[0][:input_.shape[0], :]
                noise = self.theano_rng.normal(input_.shape[0:2])
            noise = tensor.shape_padright(2)
            if self.noise_batch_size is not None:
                noise = self.parameters[0][:input_.shape[0], :, :, :]
                noise = self.theano_rng.normal(input_.shape)
        kl = (
            self.prior_noise_level - noise_level
            + 0.5 * (
                tensor.exp(2 * noise_level)
                + (average - self.prior_mean) ** 2
                ) / tensor.exp(2 * self.prior_noise_level)
            - 0.5
        application_call.add_auxiliary_variable(kl, roles=[NITS], name='nits')
        return input_ + self.noise_rate * tensor.exp(noise_level) * noise
Exemple #19
    def __init__(self, eta, cutpoints, *args, **kwargs):
        self.eta = tt.as_tensor_variable(eta)
        self.cutpoints = tt.as_tensor_variable(cutpoints)

        pa = sigmoid(tt.shape_padleft(self.cutpoints) - tt.shape_padright(self.eta))
        p_cum = tt.concatenate([
            tt.zeros_like(tt.shape_padright(pa[:, 0])),
            tt.ones_like(tt.shape_padright(pa[:, 0]))
        ], axis=1)
        p = p_cum[:, 1:] - p_cum[:, :-1]

        super().__init__(p=p, *args, **kwargs)
Exemple #20
 def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma,activation_factor, p_prev, a_prev, x_prev,):
     a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
     h = self.nonlinearity(a * activation_factor)  # BxH
     Alpha = T.nnet.softmax(T.dot(h, V_alpha) + b_alpha)  # BxC
     #Alpha = theano.printing.Print('Alpha')(Alpha)
     Mu = T.dot(h, V_mu) + b_mu  # BxC
     #Mu = theano.printing.Print('Mu')(Mu)
     Sigma = T.exp(T.dot(h, V_sigma) + b_sigma)  # BxC
     #Sigma = theano.printing.Print('Sigma')(Sigma)
     arg = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * numpy.log(2 * numpy.pi)) + T.log(Alpha)
     #arg = theano.printing.Print('Mu')(arg)
     p = p_prev + log_sum_exp(arg)
     return (p, a, x)
Exemple #21
        def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activation_factor, a_i, lp_accum, dP_da_ip1):
            B = T.cast(x_i.shape[0], theano.config.floatX)
            pot = a_i * activation_factor
            h = self.nonlinearity(pot)  # BxH

            z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha)
            z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)
            z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma)

            Alpha = T.nnet.softmax(z_alpha)  # BxC
            Mu = z_mu  # BxC
            Sigma = T.exp(z_sigma)  # BxC

            Phi = -T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x_i, 1)) / Sigma
            wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0))

            lp_current = log_sum_exp(wPhi)
            # lp_current_sum = T.sum(lp_current)

            Pi = T.exp(wPhi - T.shape_padright(lp_current, 1))  # #
            dp_dz_alpha = Pi - Alpha  # BxC
            # dp_dz_alpha = T.grad(lp_current_sum, z_alpha)
            gb_alpha = dp_dz_alpha.mean(0, dtype=theano.config.floatX)  # C
            gV_alpha = T.dot(h.T, dp_dz_alpha) / B  # HxC

            # dp_dz_mu = T.grad(lp_current_sum, z_mu)
            dp_dz_mu = Pi * T.sgn(T.shape_padright(x_i, 1) - Mu) / Sigma
            # dp_dz_mu = dp_dz_mu * Sigma
            gb_mu = dp_dz_mu.mean(0, dtype=theano.config.floatX)
            gV_mu = T.dot(h.T, dp_dz_mu) / B

            # dp_dz_sigma = T.grad(lp_current_sum, z_sigma)
            dp_dz_sigma = Pi * (T.abs_(T.shape_padright(x_i, 1) - Mu) / Sigma - 1)
            gb_sigma = dp_dz_sigma.mean(0, dtype=theano.config.floatX)
            gV_sigma = T.dot(h.T, dp_dz_sigma) / B

            dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot(dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T)  # BxH
            if non_linearity_name == "sigmoid":
                dp_dpot = dp_dh * h * (1 - h)
            elif non_linearity_name == "RLU":
                dp_dpot = dp_dh * (pot > 0)

            gfact = (dp_dpot * a_i).sum(1).mean(0, dtype=theano.config.floatX)  # 1

            dP_da_i = dP_da_ip1 + dp_dpot * activation_factor  # BxH
            gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B

            return (a_i - T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)),
                    lp_accum + lp_current,
                    gW, gb_alpha, gV_alpha, gb_mu, gV_mu, gb_sigma, gV_sigma, gfact)
Exemple #22
        def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activation_factor, a_i, lp_accum, dP_da_ip1):
            B = T.cast(x_i.shape[0], floatX)
            pot = a_i * activation_factor
            h = self.nonlinearity(pot)  # BxH

            z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha)
            z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)
            z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma)

            Alpha = T.nnet.softmax(z_alpha)  # BxC
            Mu = z_mu  # BxC
            Sigma = T.exp(z_sigma)  # BxC

            Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x_i, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * numpy.log(2 * numpy.pi))
            wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0))

            lp_current = -log_sum_exp(wPhi)  # negative log likelihood
            # lp_current_sum = T.sum(lp_current)

            Pi = T.exp(wPhi - T.shape_padright(lp_current, 1))  # #
            dp_dz_alpha = Pi - Alpha  # BxC
            # dp_dz_alpha = T.grad(lp_current_sum, z_alpha)
            gb_alpha = dp_dz_alpha.mean(0, dtype=floatX)  # C
            gV_alpha = T.dot(h.T, dp_dz_alpha) / B  # HxC

            dp_dz_mu = -Pi * (Mu - T.shape_padright(x_i, 1)) / T.sqr(Sigma)
            # dp_dz_mu = T.grad(lp_current_sum, z_mu)
            dp_dz_mu = dp_dz_mu * Sigma  # Heuristic
            gb_mu = dp_dz_mu.mean(0, dtype=floatX)
            gV_mu = T.dot(h.T, dp_dz_mu) / B

            dp_dz_sigma = Pi * (T.sqr(T.shape_padright(x_i, 1) - Mu) / T.sqr(Sigma) - 1)
            # dp_dz_sigma = T.grad(lp_current_sum, z_sigma)
            gb_sigma = dp_dz_sigma.mean(0, dtype=floatX)
            gV_sigma = T.dot(h.T, dp_dz_sigma) / B

            dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot(dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T)  # BxH
            if self.hidden_act == "sigmoid":
                dp_dpot = dp_dh * h * (1 - h)
            elif self.hidden_act == "ReLU":
                dp_dpot = dp_dh * (pot > 0)

            gfact = (dp_dpot * a_i).sum(1).mean(0, dtype=floatX)  # 1

            dP_da_i = dP_da_ip1 + dp_dpot * activation_factor  # BxH
            gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B

            return (a_i - T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)),
                    lp_accum + lp_current,
                    gW, gb_alpha, gV_alpha, gb_mu, gV_mu, gb_sigma, gV_sigma, gfact)
Exemple #23
 def create_prediction(self):#做一次predict的方法
     if self.steps > 1:
         for i in xrange(2,self.steps):
             #need T.shape_padright???
     return self.results
    def decode_to_probs(self, activations, relative_position, low_bound, high_bound):
        assert (low_bound%12==0) and (high_bound-low_bound == self.num_octaves*12), "Circle of thirds must evenly divide into octaves"
        squashed = T.reshape(activations, (-1,self.RAW_ENCODING_WIDTH))

        rsp = T.nnet.softmax(squashed[:,:3])
        c1 = T.nnet.softmax(squashed[:,3:7])
        c2 = T.nnet.softmax(squashed[:,7:10])
        octave_choice = T.nnet.softmax(squashed[:,10:])
        octave_notes = T.tile(c1,(1,3)) * T.tile(c2,(1,4))
        full_notes = T.reshape(T.shape_padright(octave_choice) * T.shape_padaxis(octave_notes, 1), (-1,12*self.num_octaves))
        full_probs = T.concatenate([rsp[:,:2], T.shape_padright(rsp[:,2])*full_notes], 1)

        newshape = T.concatenate([activations.shape[:-1],[2+high_bound-low_bound]],0)
        fixed = T.reshape(full_probs, newshape, ndim=activations.ndim)
        return fixed
Exemple #25
def max_pool_3d(input, ds, ignore_border=False):
    # [n,c,x,y,z]以外の入力は受け付けない
    if input.ndim != 5:
        raise NotImplementedError(
            'max_pool_3d requires a input [n, c, x, y, z]')

    # 入力次元
    vid_dim = input.ndim

    # [y, z]フレームの次元数
    frame_shape = input.shape[-2:]

    # バッチサイズ
    # フレーム次元以外の全ての次元の要素数を掛け合わせる
    batch_size = T.prod(input.shape[:-2])
    # http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.shape_padright
    batch_size = T.shape_padright(batch_size, 1)

    new_shape = T.cast(T.join(0, batch_size, T.as_tensor([1, ]), frame_shape),
    input_4D = T.reshape(input, new_shape, ndim=4)

    op = DownsampleFactorMax((ds[1], ds[2]), ignore_border)
    output = op(input_4D)
    outshape = T.join(0, input.shape[:-2], output.shape[-2:])
    out = T.reshape(output, outshape, ndim=input.ndim)

    shufl = (
        list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [
            vid_dim - 3])
    input_time = out.dimshuffle(shufl)
    vid_shape = input_time.shape[-2:]

    batch_size = T.prod(input_time.shape[:-2])
    batch_size = T.shape_padright(batch_size, 1)

    new_shape = T.cast(T.join(0, batch_size,
                              T.as_tensor([1, ]),
                              vid_shape), 'int32')
    input_4D_time = T.reshape(input_time, new_shape, ndim=4)
    op = DownsampleFactorMax((1, ds[0]), ignore_border)
    outtime = op(input_4D_time)
    outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (
        list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [
            vid_dim - 2])
    return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
Exemple #26
def _create_maximum_activation_update(output, record, streamindex, topn):
    Calculates update of the topn maximums for one batch of outputs.
    dims, maximums, indices, snapshot = record
    counters = tensor.tile(tensor.shape_padright(
        tensor.arange(output.shape[0]) + streamindex), (1, output.shape[1]))
    if len(dims) == 1:
        # output is a 2d tensor, (cases, units) -> activation
        tmax = output
        # counters is a 2d tensor broadcastable (cases, units) -> case_index
        tind = counters
        # output is a 4d tensor: fmax flattens it to 3d
        fmax = output.flatten(ndim=3)
        # fargmax is a 2d tensor containing rolled maximum locations
        fargmax = fmax.argmax(axis=2)
        # fetch the maximum. tmax is 2d, (cases, units) -> activation
        tmax = _apply_index(fmax, fargmax, axis=2)
        # targmax is a tuple that separates rolled-up location into (x, y)
        targmax = divmod(fargmax, dims[2])
        # tind is a 3d tensor (cases, units, 3) -> case_index, maxloc
        # this will match indices which is a 3d tensor also
        tind = tensor.stack((counters, ) + targmax, axis=2)
    cmax = tensor.concatenate((maximums, tmax), axis=0)
    cind = tensor.concatenate((indices, tind), axis=0)
    cargsort = (-cmax).argsort(axis=0)[:topn]
    newmax = _apply_perm(cmax, cargsort, axis=0)
    newind = _apply_perm(cind, cargsort, axis=0)
    updates = [(maximums, newmax), (indices, newind)]
    if snapshot:
        csnap = tensor.concatenate((snapshot, output), axis=0)
        newsnap = _apply_perm(csnap, cargsort, axis=0)
        updates.append((snapshot, newsnap))
    return updates
Exemple #27
    def predict_all(self, latitude, longitude, latitude_mask, **kwargs):
        latitude = (latitude - data.train_gps_mean[0]) / data.train_gps_std[0]
        longitude = (longitude - data.train_gps_mean[1]) / data.train_gps_std[1]

        pre_emb = tuple(self.pre_context_embedder.apply(**kwargs))
        latitude = tensor.shape_padright(latitude)
        longitude = tensor.shape_padright(longitude)
        itr = self.input_to_rec.apply(tensor.concatenate(pre_emb + (latitude, longitude), axis=1))
        itr = itr.repeat(4, axis=1)
        (next_states, next_cells) = self.rec.apply(itr, kwargs['states'], kwargs['cells'], mask=latitude_mask, iterate=False)

        post_emb = tuple(self.post_context_embedder.apply(**kwargs))
        rto = self.rec_to_output.apply(tensor.concatenate(post_emb + (next_states,), axis=1))

        rto = (rto * data.train_gps_std) + data.train_gps_mean
        return (rto, next_states, next_cells)
Exemple #28
 def cost(self):
   :rtype: (theano.Variable | None, dict[theano.Variable,theano.Variable] | None)
   :returns: cost, known_grads
   known_grads = None
   if self.loss == 'ce' or self.loss == 'priori':
     if self.attrs.get("target", "").endswith("[sparse:coo]"):
       assert isinstance(self.y, tuple)
       assert len(self.y) == 3
       from NativeOp import crossentropy_softmax_and_gradient_z_sparse
       y_mask = self.network.j[self.attrs.get("target", "").replace("[sparse:coo]", "[sparse:coo:2:0]")]
       ce, grad_z = crossentropy_softmax_and_gradient_z_sparse(
         self.z, self.index, self.y[0], self.y[1], self.y[2], y_mask)
       return self.norm * T.sum(ce), {self.z: grad_z}
     if self.y_data_flat.type == T.ivector().type:
       # Use crossentropy_softmax_1hot to have a more stable and more optimized gradient calculation.
       # Theano fails to use it automatically; I guess our self.i indexing is too confusing.
       #idx = self.index.flatten().dimshuffle(0,'x').repeat(self.y_m.shape[1],axis=1) # faster than line below
       #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m * idx, y_idx=self.y_data_flat * self.index.flatten())
       nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y_data_flat[self.i])
       #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat)
       #nll = -T.log(T.nnet.softmax(self.y_m)[self.i,self.y_data_flat[self.i]])
       #z_c = T.exp(self.z[:,self.y])
       #nll = -T.log(z_c / T.sum(z_c,axis=2,keepdims=True))
       #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat)
       #nll = T.set_subtensor(nll[self.j], T.constant(0.0))
       nll = -T.dot(T.log(T.clip(self.p_y_given_x[self.i], 1.e-38, 1.e20)), self.y_data_flat[self.i].T)
     return self.norm * T.sum(nll), known_grads
   elif self.loss == 'entropy':
     h_e = T.exp(self.y_m) #(TB)
     pcx = T.clip((h_e / T.sum(h_e, axis=1, keepdims=True)).reshape((self.index.shape[0],self.index.shape[1],self.attrs['n_out'])), 1.e-6, 1.e6) # TBD
     ee = -T.sum(pcx[self.i] * T.log(pcx[self.i])) # TB
     #nll, pcxs = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y[self.i])
     nll, _ = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat) # TB
     ce = nll.reshape(self.index.shape) * self.index # TB
     y = self.y_data_flat.reshape(self.index.shape) * self.index # TB
     f = T.any(T.gt(y,0), axis=0) # B
     return T.sum(f * T.sum(ce, axis=0) + (1-f) * T.sum(ee, axis=0)), known_grads
     #return T.sum(T.switch(T.gt(T.sum(y,axis=0),0), T.sum(ce, axis=0), -T.sum(ee, axis=0))), known_grads
     #return T.switch(T.gt(T.sum(self.y_m[self.i]),0), T.sum(nll), -T.sum(pcx * T.log(pcx))), known_grads
   elif self.loss == 'priori':
     pcx = self.p_y_given_x[self.i, self.y_data_flat[self.i]]
     pcx = T.clip(pcx, 1.e-38, 1.e20)  # For pcx near zero, the gradient will likely explode.
     return -T.sum(T.log(pcx)), known_grads
   elif self.loss == 'sse':
     if self.y_data_flat.dtype.startswith('int'):
       y_f = T.cast(T.reshape(self.y_data_flat, (self.y_data_flat.shape[0] * self.y_data_flat.shape[1]), ndim=1), 'int32')
       y_oh = T.eq(T.shape_padleft(T.arange(self.attrs['n_out']), y_f.ndim), T.shape_padright(y_f, 1))
       return T.mean(T.sqr(self.p_y_given_x[self.i] - y_oh[self.i])), known_grads
       #return T.sum(T.sum(T.sqr(self.y_m - self.y.reshape(self.y_m.shape)), axis=1)[self.i]), known_grads
       return T.sum(T.sqr(self.y_m[self.i] - self.y_data_flat.reshape(self.y_m.shape)[self.i])), known_grads
       #return T.sum(T.sum(T.sqr(self.z - (self.y.reshape((self.index.shape[0], self.index.shape[1], self.attrs['n_out']))[:self.z.shape[0]])), axis=2).flatten()[self.i]), known_grads
       #y_z = T.set_subtensor(T.zeros((self.index.shape[0],self.index.shape[1],self.attrs['n_out']), dtype='float32')[:self.z.shape[0]], self.z).flatten()
       #return T.sum(T.sqr(y_z[self.i] - self.y[self.i])), known_grads
       #return T.sum(T.sqr(self.y_m - self.y[:self.z.shape[0]*self.index.shape[1]]).flatten()[self.i]), known_grads
     assert False, "unknown loss: %s" % self.loss
Exemple #29
def max_pool_2d(input, ds, ignore_border=False, st=None, padding=(0, 0),
    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1])

    :type input: N-D theano tensor of input images.
    :param input: input images. Max pooling will be done over the 2 last
    :type ds: tuple of length 2
    :param ds: factor by which to downscale (vertical ds, horizontal ds).
        (2,2) will halve the image in each dimension.
    :type ignore_border: bool
    :param ignore_border: When True, (5,5) input with ds=(2,2)
        will generate a (2,2) output. (3,3) otherwise.
    :type st: tuple of lenght 2
    :param st: stride size, which is the number of shifts
        over rows/cols to get the the next pool region.
        if st is None, it is considered equal to ds
        (no overlap on pooling regions)
    :param padding: (pad_h, pad_w), pad zeros to extend beyond four borders
            of the images, pad_h is the size of the top and bottom margins,
            and pad_w is the size of the left and right margins.
    :type padding: tuple of two ints
    :param mode: 'max', 'average_inc_pad' or 'average_exc_pad'.
        Operation executed on each window.  `max` always excludes the padding
        in the computation. `average` gives you the choice to include or
        exclude it.
    :type mode: string
    if input.ndim < 2:
        raise NotImplementedError('max_pool_2d requires a dimension >= 2')
    if input.ndim == 4:
        op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding,
        output = op(input)
        return output

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(tensor.join(0, batch_size,
                                        img_shape), 'int64')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = DownsampleFactorMax(ds, ignore_border, st=st, padding=padding,
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
Exemple #30
 def one_hot(self,t, r=None):
     if r is None:
         r = T.max(t) + 1
     ranges = T.shape_padleft(T.arange(r), t.ndim)
     return T.cast(T.eq(ranges, T.shape_padright(t, 1)) ,dtype =theano.config.floatX)       
Exemple #31
    def log_likelihood(self, X, Y=None, n_samples=None):
        p_layers = self.p_layers
        q_layers = self.q_layers
        n_layers = len(p_layers)

        if n_samples == None:
            n_samples = self.n_samples

        batch_size = X.shape[0]

        # Get samples
        X = f_replicate_batch(X, n_samples)
        samples, log_p, log_q = self.sample_q(X, None)

        # Reshape and sum
        log_p_all = T.zeros((batch_size, n_samples))
        log_q_all = T.zeros((batch_size, n_samples))
        for l in xrange(n_layers):
            samples[l] = samples[l].reshape(
                (batch_size, n_samples, p_layers[l].n_X))
            log_q[l] = log_q[l].reshape((batch_size, n_samples))
            log_p[l] = log_p[l].reshape((batch_size, n_samples))
            log_p_all += log_p[l]  # agregate all layers
            log_q_all += log_q[l]  # agregate all layers

        # Approximate log P(X)
        log_px = f_logsumexp(log_p_all - log_q_all, axis=1) - T.log(n_samples)

        # Calculate samplig weights
        log_pq = (log_p_all - log_q_all - T.log(n_samples))
        w_norm = f_logsumexp(log_pq, axis=1)
        log_w = log_pq - T.shape_padright(w_norm)
        w = T.exp(log_w)

        # Calculate KL(P|Q), Hp, Hq
        KL = [None] * n_layers
        Hp = [None] * n_layers
        Hq = [None] * n_layers
        for l in xrange(n_layers):
            KL[l] = T.sum(w * (log_p[l] - log_q[l]), axis=1)
            Hp[l] = f_logsumexp(log_w + log_p[l], axis=1)
            Hq[l] = T.sum(w * log_q[l], axis=1)

        return log_px, w, log_p_all, log_q_all, KL, Hp, Hq
Exemple #32
    def queue_transform(feature_strengths,
        Process features according to a "fragmented queue", where each timestep
        gets a size-1 window onto a feature queue. Effectively,
            feature_strengths gives how much to push onto queue
            feature_vects gives what to push on
            pop weights are tied to feature_strengths
            output is a size-1 peek (without popping)

            - feature_strengths: float32 tensor of shape (batch, push_timestep) in [0,1]
            - feature_vects: float32 tensor of shape (batch, push_timestep, feature_dim)

            - peek_vects: float32 tensor of shape (batch, timestep, feature_dim)
        n_batch, n_time, n_feature = feature_vects.shape

        cum_sum_str = T.extra_ops.cumsum(feature_strengths, 1)

        # We will be working in (batch, timestep, push_timestep)
        # For each timestep, if we subtract out the sum of pushes before that timestep
        # and then cap to 0-1 we get the cumsums for just the features active in that
        # timestep
        timestep_adjustments = T.shape_padright(cum_sum_str -
        push_time_cumsum = T.shape_padaxis(cum_sum_str, 1)
        relative_cumsum = push_time_cumsum - timestep_adjustments
        capped_cumsum = T.minimum(T.maximum(relative_cumsum, 0), 1)

        # Now we can recover the peek strengths by taking a diff
        shifted = T.concatenate(
            [T.zeros((n_batch, n_time, 1)), capped_cumsum[:, :, :-1]], 2)
        peek_strengths = capped_cumsum - shifted
        # Peek strengths is now (batch, timestep, push_timestep)

        result = T.batched_dot(peek_strengths, feature_vects)

        if return_strengths:
            return peek_strengths, result
            return result
Exemple #33
    def take_look(self,
        r"""Compute attention weights and produce glimpses.

        sequence : :class:`~tensor.TensorVariable`
            The sequence, time is the 1-st dimension.
        preprocessed_sequence : :class:`~tensor.TensorVariable`
            The preprocessed sequence. If ``None``, is computed by calling
        mask : :class:`~tensor.TensorVariable`
            A 0/1 mask specifying available data. 0 means that the
            corresponding sequence element is fake.
            The states of the agent.

        glimpses : theano variable
            Linear combinations of sequence elements with the attention
        weights : theano variable
            The attention weights. The first dimension is batch, the second
            is time.

        if not preprocessed_sequence:
            preprocessed_sequence = self.preprocess(sequence)
        transformed_states = self.state_transformers.apply(return_dict=True,
        # Broadcasting of transformed states should be done automatically
        match_vectors = sum(transformed_states.values(), preprocessed_sequence)
        energies = self.energy_computer.apply(match_vectors).reshape(
            match_vectors.shape[:-1], ndim=match_vectors.ndim - 1)
        unormalized_weights = tensor.exp(energies)
        if mask:
            unormalized_weights *= mask
        weights = unormalized_weights / unormalized_weights.sum(axis=0)
        glimpses = (tensor.shape_padright(weights) * sequence).sum(axis=0)
        return glimpses, weights.dimshuffle(1, 0)
Exemple #34
def max_pool_2d(input, ds, ignore_border=False, st=None):
    Takes as input a N-D tensor, where N >= 2. It downscales the input image by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1])

    :type input: N-D theano tensor of input images.
    :param input: input images. Max pooling will be done over the 2 last
    :type ds: tuple of length 2
    :param ds: factor by which to downscale (vertical ds, horizontal ds).
        (2,2) will halve the image in each dimension.
    :type ignore_border: bool
    :param ignore_border: When True, (5,5) input with ds=(2,2)
        will generate a (2,2) output. (3,3) otherwise.
    :type st: tuple of lenght 2
    :param st: stride size, which is the number of shifts
        over rows/cols to get the the next pool region.
        if st is None, it is considered equal to ds
        (no overlap on pooling regions)

    if input.ndim < 2:
        raise NotImplementedError('max_pool_2d requires a dimension >= 2')

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(
        tensor.join(0, batch_size, tensor.as_tensor([1]), img_shape), 'int64')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = DownsampleFactorMax(ds, ignore_border, st=st)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
    def get_means_and_covs(self, X, X_embedded):
        """ Get the mean and the covariance for the distribution for the code z

        :param X:               (N x max(L)) matrix representing the text
        :param X_embedded:      (N x max(L) x E) tensor representing the embedded text

        :return:                variational mean and covariance for the latents given a sentence

        # If x is less or equal than 0 then return 0, else 1 (used to filter out words)
        mask = T.switch(T.lt(X, 0), 0, 1)                                       # N x max(L)

        # Reshape the embedding of X adding a singleton dimension on the right
        X_embedded *= T.shape_padright(mask)                                    # N x max(L) x E x 1 (broadcastable)

        means = get_output(self.mean_nn, X_embedded)                            # N x Z
        covs = get_output(self.cov_nn, X_embedded)                              # N x Z

        return means, covs
Exemple #36
    def build_NADE(self, v, W, V, b, c):
        a = T.shape_padright(v) * T.shape_padleft(W)
        a = a.dimshuffle(1, 0, 2)

        c_init = c
        if c.ndim == 1:
            c_init = T.dot(T.ones((v.shape[0], 1)), T.shape_padleft(c))

        (activations, s), updates = theano.scan(
            lambda V_i, a_i, partial_im1:
            (a_i + partial_im1, T.dot(V_i, T.nnet.sigmoid(partial_im1.T))),
            sequences=[V.T, a],
            outputs_info=[c_init, None])
        s = s.T + b
        y = T.nnet.sigmoid(s)

        cost = -v * T.log(y) - (1 - v) * T.log(1 - y)
        cost = cost.sum() / v.shape[0]
        return s, y, cost
Exemple #37
def my_pool_2d(input, ds, ignore_border=None, st=None, padding=(0, 0),
    This function is a patch to the maxpool op of Theano:
    contrarily to current implementation of maxpool, the gradient is backpropagated
    to only one input of a given patch if several inputs have the same value. This is
    consistent with the CuDNN implementation (and therefore the op is replaced by the
    CuDNN version when possible).

    if input.ndim < 2:
        raise NotImplementedError('pool_2d requires a dimension >= 2')

    if not ignore_border is None:
        # check that ignore_border is True if provided
        assert ignore_border
    ignore_border = True

    if input.ndim == 4:
        op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode)
        output = op(input)
        return output

    # extract image dimensions
    img_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = tensor.cast(tensor.join(0, batch_size,
                                        img_shape), 'int64')
    input_4D = tensor.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of images
    op = MyPool(ds, ignore_border, st=st, padding=padding, mode=mode)
    output = op(input_4D)

    # restore to original shape
    outshp = tensor.join(0, input.shape[:-2], output.shape[-2:])
    return tensor.reshape(output, outshp, ndim=input.ndim)
    def __init__(self,
        super(SharedBiasParameters, self).__init__(rbm, [units], name=name)
        self.var = b
        self.variables = [self.var]
        self.u = units
        self.ud = dimensions
        self.sd = shared_dimensions
        self.nd = self.ud - self.sd

        self.terms[self.u] = lambda vmap: T.shape_padright(self.var, self.sd)

        self.energy_gradients[self.var] = lambda vmap: T.mean(
            vmap[self.u], axis=self._shared_axes(vmap))
Exemple #39
    def train(self, data):
        data = np.asarray(data, dtype=theano.config.floatX)
        val = T.vector('val')

        min_idx = self.min_dist(val)

        self.nhood_radius = self.radius * T.exp(-self.it / self.time_const)

        dists = self.in_neighbourhood(self.lattice[min_idx])

        in_nhood = dists < self.nhood_radius

        lr = self.start_lr * T.exp(-self.it / self.iterations)

        updates = [(self.W, self.W + self.compute_influence(dists) *
                    T.shape_padright(in_nhood, 1) * lr * (val - self.W))]

        epoch = theano.function(inputs=[val],

        update_iteration = theano.function(inputs=[],
                                           updates={self.it: self.it + 1})

        self.last_W = None

        for i in range(self.iterations):
            index = np.random.random_integers(0, len(data) - 1)
            self.last_W = epoch(data[index])

            #index += 1
            #if index >= len(data):
            #    index = 0

            self.it = update_iteration()

            print i

            # print an image every 100 epochs
            if (i + 1) % self.print_fr == 0:
Exemple #40
    def build_aligner(self):
        tgt_action_seq = ndim_itensor(3, 'tgt_action_seq')
        tgt_action_seq_type = ndim_itensor(3, 'tgt_action_seq_type')
        tgt_node_seq = ndim_itensor(2, 'tgt_node_seq')
        tgt_par_rule_seq = ndim_itensor(2, 'tgt_par_rule_seq')
        tgt_par_t_seq = ndim_itensor(2, 'tgt_par_t_seq')

        tgt_node_embed = self.node_embedding[tgt_node_seq]
        query_tokens = ndim_itensor(2, 'query_tokens')
        query_token_embed, query_token_embed_mask = self.query_embedding(
            query_tokens, mask_zero=True)
        batch_size = tgt_action_seq.shape[0]
        max_example_action_num = tgt_action_seq.shape[1]

        tgt_action_seq_embed = T.switch(T.shape_padright(tgt_action_seq[:, :, 0] > 0),
                                        self.rule_embedding_W[tgt_action_seq[:, :, 0]],
                                        self.vocab_embedding_W[tgt_action_seq[:, :, 1]])
        tgt_action_seq_embed_tm1 = tensor_right_shift(tgt_action_seq_embed)
        tgt_par_rule_embed = T.switch(tgt_par_rule_seq[:, :, None] < 0,
                                      T.alloc(0., 1, config.rule_embed_dim),

        if not config.frontier_node_type_feed:
            tgt_node_embed *= 0.
        if not config.parent_action_feed:
            tgt_par_rule_embed *= 0.

        decoder_input = T.concatenate(
            [tgt_action_seq_embed_tm1, tgt_node_embed, tgt_par_rule_embed], axis=-1)
        query_embed = self.query_encoder_lstm(query_token_embed, mask=query_token_embed_mask, dropout=0, srng=self.srng)

        tgt_action_seq_mask = T.any(tgt_action_seq_type, axis=-1)

        alignments = self.decoder_lstm.align(decoder_input, context=query_embed,

        alignment_inputs = [query_tokens, tgt_action_seq, tgt_action_seq_type,
                            tgt_node_seq, tgt_par_rule_seq, tgt_par_t_seq]
        self.align = theano.function(alignment_inputs, [alignments])
Exemple #41
    def __init__(self, w, comp_dists, mixture_axis=-1, *args, **kwargs):
        self.w = tt.as_tensor_variable(w)
        if not isinstance(comp_dists, Distribution):
            raise TypeError(
                "The MixtureSameFamily distribution only accepts Distribution "
                f"instances as its components. Got {type(comp_dists)} instead."
        self.comp_dists = comp_dists
        if mixture_axis < 0:
            mixture_axis = len(comp_dists.shape) + mixture_axis
            if mixture_axis < 0:
                raise ValueError(
                    "`mixture_axis` is supposed to be in shape of components' distribution. "
                    f"Got {mixture_axis + len(comp_dists.shape)} axis instead out of the bounds."
        comp_shape = to_tuple(comp_dists.shape)
        self.shape = comp_shape[:mixture_axis] + comp_shape[mixture_axis + 1:]
        self.mixture_axis = mixture_axis
        kwargs.setdefault("dtype", self.comp_dists.dtype)

        # Compute the mode so we don't always have to pass a testval
        defaults = kwargs.pop("defaults", [])
        event_shape = self.comp_dists.shape[mixture_axis + 1:]
        _w = tt.shape_padleft(
            tt.shape_padright(w, len(event_shape)),
            len(self.comp_dists.shape) - w.ndim - len(event_shape),
        mode = take_along_axis(
            tt.argmax(_w, keepdims=True),
        self.mode = mode[(..., 0) + (slice(None), ) * len(event_shape)]

        if not all_discrete(comp_dists):
            mean = tt.as_tensor_variable(self.comp_dists.mean)
            self.mean = (_w * mean).sum(axis=mixture_axis)
            if "mean" not in defaults:

        super().__init__(defaults=defaults, *args, **kwargs)
Exemple #42
def AttnDecStep(name, n_input, input_dim, hidden_dim, ctx_dim, ctx, x_t, prev_state, mode='train',weightnorm=True):
    # h_tm1 = prev_state[:,:hidden_dim]
    # c_tm1 = prev_state[:,hidden_dim:]
    h_tm1 = prev_state
    if mode=='open-loop':
        x_t = lib.ops.Embedding(

    tiled_h_tm1 = T.tile(h_tm1[:,None,:],[1,ctx.shape[1],1])
    e_vec = T.nnet.relu(lib.ops.Linear(
    e_vec = T.nnet.softmax(lib.ops.Linear(
    )[:,:,0]) # (B, seq_len)

    c_t = T.sum(T.shape_padright(e_vec)*ctx,axis=1)
    input_to_rnn = T.concatenate([x_t,c_t],-1)
    # state_t = LSTMStep(name,False, input_dim+ctx_dim, hidden_dim, input_to_rnn, prev_state)
    mask_t = T.ones((x_t.shape[0],)).astype(theano.config.floatX)
    state_t = GRUStep(name, input_dim+ctx_dim, hidden_dim, mask_t, input_to_rnn, h_tm1)
    if mode=='open-loop':
        logits = T.nnet.softmax(lib.ops.Linear(
        idxs = T.argmax(logits,axis=-1).astype('int32')
        return idxs,state_t
        return state_t,c_t
Exemple #43
    def __init__(self, n, p, *args, **kwargs):
        super(Multinomial, self).__init__(*args, **kwargs)

        p = p / tt.sum(p, axis=-1, keepdims=True)

        if len(self.shape) == 2:
                assert n.shape == (self.shape[0], )
            except AttributeError:
                # this occurs when n is a scalar Python int or float
                n *= tt.ones(self.shape[0])

            self.n = tt.shape_padright(n)
            self.p = p if p.ndim == 2 else tt.shape_padleft(p)
            self.n = n
            self.p = p

        self.mean = self.n * self.p
        self.mode = tt.cast(tt.round(self.mean), 'int32')
Exemple #44
    def __init__(
        self, points, values, check_sorted=True, bounds_error=False, nout=-1
        self.ndim = len(points)
        self.nout = int(nout)

        self.points = [theano.shared(p) for p in points]
        self.values = theano.shared(values)
        if self.values.ndim == self.ndim:
            self.values = tt.shape_padright(self.values)

        self.check_sorted = bool(check_sorted)
        self.bounds_error = bool(bounds_error)

        self.interp_op = RegularGridOp(
Exemple #45
def grad_wrt_list(cost, wrt_list):
    Compute gradient of cost wrt the variables in wrt_list.
    Return a concatenated vector of the results
    if wrt_list == []:
        return T.constant(0.), []

    g_list = T.grad(cost, wrt_list)

    for (n, g) in enumerate(g_list):
        if g.ndim < 1:
            g_list[n] = T.shape_padright(g, n_ones=1)
        elif g.ndim > 1:
            #            raise Exception("Gradients can only be taken wrt vectors.")
            g_list[n] = T.flatten(g)

    g_vec = T.concatenate(g_list)

    return g_vec, g_list
Exemple #46
def Arodz(X, Y):
    """Takes in two sample sets, one from each class, and
    returns the MAP estimates of w and b
    numberOfFeatures = len(X[0])
    Y = np.reshape(Y, (len(Y), 1))

    # instantiate an empty PyMC3 model
    basic_model = pm.Model()

    # fill the model with details:
    with basic_model:
        mu_prior_cov = 100*np.eye(numberOfFeatures)
        mu_prior_mu = np.zeros((numberOfFeatures,))
        # Priors for w,b (Gaussian priors), centered at 0, with very large std.dev.
        w = pm.MvNormal('estimated_w', mu=mu_prior_mu, cov=mu_prior_cov, shape=numberOfFeatures)
        b  = pm.Normal('estimated_b',0,100)

        # calculate u=w^Tx+b
        # here w, b are unknown to be estimated from data
        # X is the known data matrix [samples x features]
        u = pm.Deterministic('my_u',T.dot(X,ww) + b)
        # u = pm.Deterministic('my_u',X*w + b);
        # P(+1|x)=a(u) #see slides for def. of a(u)
        prob = pm.Deterministic('my_prob',1.0 / (1.0 + T.exp(-1.0*u)))
        # class +1 is comes from a probability distribution with probability "prob" for +1, and 1-prob for class 0
        # here Y is the known vector of classes
        # prob is (indirectly coming from the estimate of w,b and the data x)
        Y_obs=pm.Bernoulli('Y_obs',p=prob,observed = Y)
    # done with setting up the model

    # now perform maximum likelihood (actually, maximum a posteriori (MAP), since we have priors) estimation
    # map_estimate1 is a dictionary: "parameter name" -> "it's estimated value"
    map_estimate1 = pm.find_MAP(model=basic_model)

    return map_estimate1['estimated_w'], map_estimate1['estimated_b']
Exemple #47
    def __init__(self, loss, n_units, transfer_funcs):
        n_layers = len(n_units)

        print "===== MLP ========="
        print "Number of layers:  ", n_layers
        print "Loss:              ", loss
        print "Number of units:   ", n_units
        print "Transfer function: ", transfer_funcs
        print "==================="

        # create ParameterSet
        vars = {}
        for lyr in range(n_layers):
            if lyr != 0:
                vars["weights_%d_to_%d" % (lyr - 1, lyr)] = (n_units[lyr], n_units[lyr - 1])
                vars["bias_%d" % lyr] = (n_units[lyr],)
            vars.update(self.transfer_func_parameter_shape(lyr, transfer_funcs[lyr], n_units[lyr]))
        self.ps = ParameterSet(**vars)

        # create graph
        v_input = T.fmatrix('v_input')      # v_input[unit, smpl]
        unit_val = [None for _ in range(n_layers)]
        for lyr in range(n_layers):
            if lyr == 0:
                unit_act = v_input
                unit_act = T.dot(self.weights(lyr - 1, lyr), unit_val[lyr - 1]) + T.shape_padright(self.bias(lyr))
            unit_val[lyr] = self.make_transfer_func(lyr, transfer_funcs[lyr])(unit_act)
        output = unit_val[-1]
        self.f_predict = function([self.ps.flat, v_input], output, name='f_predict')

        # calculate loss
        if loss is not None:
            v_target = T.fmatrix('v_target')    # v_target[unit, smpl]
            fit_smpl_loss = self.fit_loss(loss, transfer_funcs[-1], v_target, output)
            fit_loss = T.mean(fit_smpl_loss)
            loss = fit_loss

            dloss = T.grad(loss, self.ps.flat)
            self.f_loss = function([self.ps.flat, v_input, v_target], loss, name='f_loss')
            self.f_loss_grad = function([self.ps.flat, v_input, v_target], dloss, name='f_loss_grad')
Exemple #48
def pool_1d_Lasagne(x, axis=1, mode='max'):
    Lasagne requires x is 3D, and pooling is done on the last dimension
    :param x:
    :param axis:
    input_4d = tensor.shape_padright(x, 1)
    if axis == 1:
        input_4d = input_4d.dimshuffle((0, 2, 1, 3))
    pooled = pool_2d(
        ws=(2, 1),
        stride=(2, 1),
        pad=(0, 0),
    if axis == 1:  # [DV] add support for 'axis' para
        pooled = pooled.dimshuffle((0, 2, 1, 3))
    return pooled[:, :, :, 0]
Exemple #49
    def compute_weighted_averages(self, weights, attended):
        """Compute weighted averages of the attended sequence vectors.

        weights : :class:`~theano.Variable`
            The weights. The shape must be equal to the attended shape
            without the last dimension.
        attended : :class:`~theano.Variable`
            The attended. The index in the sequence must be the first

        weighted_averages : :class:`~theano.Variable`
            The weighted averages of the attended elements. The shape
            is equal to the attended shape with the first dimension

        return (tensor.shape_padright(weights) * attended).sum(axis=0)
Exemple #50
    def concatenate_basic(self, query_token_embed, query_tokens_phrase,
                          query_tokens_pos, query_tokens_canon_id):
        transform = lambda tokens: T.shape_padright(tokens)

        # concatenate query_token_embed with query_tokens_phrase and query_tokens_pos,
        # essentially expanding the embedding to incorporate the new data
        if config.include_cid == True:
            return T.concatenate([
            return T.concatenate([
Exemple #51
def conv2d_same(input, filters, input_shape=(None, None, None, None), filter_shape=(None, None, None, None),
    assert input.ndim == 4 and filters.ndim == 4
    assert (4 == len(input_shape)) and (4 == len(filter_shape))
    assert (1 == filter_shape[2] % 2) and (1 == filter_shape[3] % 2)
    if (tuple(input_shape[2:4]) == (1, 1) and tuple(filter_shape[2:4]) == (1, 1)) or (
                    tuple(filter_shape[2:4]) == (1, 1) and theano.config.device == "cpu"):
        return tensor4dot(input, filters)
        new_row_begin = filters.shape[2] / 2
        new_row_end = input.shape[2] + filters.shape[2] / 2
        new_col_begin = filters.shape[3] / 2
        new_col_end = input.shape[3] + filters.shape[3] / 2
        if padding is not None:
            assert 1 == padding.ndim
            padded_input = TT.ones((
                input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1,
                input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX)
            padded_input = TT.set_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end],
            padding = TT.shape_padleft(TT.shape_padright(padding, 2), 1)
            padded_input = padding * padded_input
            padded_input = TT.zeros((
                input.shape[0], input.shape[1], input.shape[2] + filters.shape[2] - 1,
                input.shape[3] + filters.shape[3] - 1)).astype(theano.config.floatX)
        padded_input = TT.inc_subtensor(padded_input[:, :, new_row_begin:new_row_end, new_col_begin:new_col_end], input)
        new_input_shape = [None, None, None, None]
        if input_shape[0] is not None:
            new_input_shape[0] = input_shape[0]
        if input_shape[1] is not None:
            new_input_shape[1] = input_shape[1]
        if input_shape[2] is not None and filter_shape[2] is not None:
            new_input_shape[2] = input_shape[2] + filter_shape[2] - 1
        if input_shape[3] is not None and filter_shape[3] is not None:
            new_input_shape[3] = input_shape[3] + filter_shape[3] - 1
        ret = TT.nnet.conv2d(input=padded_input, filters=filters, border_mode='valid',
                             image_shape=tuple(new_input_shape), filter_shape=filter_shape)

        return ret
Exemple #52
            def theano_code(y, x, xs, ys, zs, xo, yo, ro):

                # Get the z coord
                z = tt.sqrt(1 - x**2 - y**2)

                # Compute the intensity
                pT = self.map_ref.ops.pT(x, y, z)

                # Weight the intensity by the illumination
                # Dot the polynomial into the basis
                intensity = tt.shape_padright(tt.dot(pT, self._A1y))

                # Weight the intensity by the illumination
                xyz = tt.concatenate((
                    tt.reshape(x, [1, -1]),
                    tt.reshape(y, [1, -1]),
                    tt.reshape(z, [1, -1]),
                I = self.map_ref.ops.compute_illumination_point_source(
                intensity = tt.switch(tt.isnan(intensity), intensity,
                                      intensity * I)[0, 0]

                # Check if the point is visible
                result = ifelse(
                    ((x - xo)**2 + (y - yo)**2 < ro**2)[0],
                        (x**2 + y**2 > 1)[0],
                return result
Exemple #53
    def step(self, ipt, state, state_strength, dropout_masks=None):
        Perform a single step of the network

            ipt: The current input. Should be an int tensor of shape (n_batch, self.input_width)
            state: The previous state. Should be a float tensor of shape (n_batch, self.output_width)
            state_strength: Strength of the previous state. Should be a float tensor of shape
            dropout_masks: Masks from get_dropout_masks

        Returns: The next output state, and the next output strength
        if dropout_masks is not None:
            ipt_masks, state_masks = dropout_masks
            ipt = ipt * ipt_masks
            state = state * state_masks

        obs_state = state * T.shape_padright(state_strength)
        cat_ipt_state = T.concatenate([ipt, obs_state], 1)
        reset = do_layer(T.nnet.sigmoid, cat_ipt_state, self._reset_W,
        update = do_layer(T.nnet.sigmoid, cat_ipt_state, self._update_W,
        update_state = update[:, :-1]
        update_strength = update[:, -1]

        cat_reset_ipt_state = T.concatenate([ipt, (reset * obs_state)], 1)
        candidate_act = do_layer(T.tanh, cat_reset_ipt_state,
                                 self._activation_W, self._activation_b)
        candidate_strength = do_layer(
            T.nnet.sigmoid, cat_reset_ipt_state, self._strength_W,

        newstate = update_state * state + (1 - update_state) * candidate_act
        newstrength = update_strength * state_strength + (
            1 - update_strength) * candidate_strength

        return newstate, newstrength
Exemple #54
    def get_relative_position(self, t, light_delay=False):
        """The planets' positions relative to the star

            t: The times where the position should be evaluated.

            The components of the position vector at ``t`` in units of

        if light_delay:
            raise NotImplementedError(
                "Light travel time delay is not implemented for simple orbits"
        dt = tt.mod(tt.shape_padright(t) - self._ref_time, self.period)
        dt -= self._half_period
        x = tt.squeeze(self.speed * dt)
        y = tt.squeeze(self._b_norm + tt.zeros_like(dt))
        m = tt.abs_(dt) < 0.5 * self.duration
        z = tt.squeeze(m * 1.0 - (~m) * 1.0)
        return x, y, z
Exemple #55
    def __init__(self, theta, alpha, kappa, gamma=0, sigma=1, *args, **kwargs):
        super(GRMLike, self).__init__(*args, **kwargs)

        self.param_list = []
        for var in [theta, alpha, kappa, gamma, sigma]:

        par_names = ['theta', 'alpha', 'kappa', 'gamma', 'sigma']
        self.params = {nm: var for nm, var in zip(par_names, self.param_list)}

        self.cprobst, self.probst = self.__init_probs()

        # Set number of categories
        self.k = tt.shape(self.probst)[-1]

        # Compute mode for each response category
        self.mode = tt.argmax(self.probst, axis=-1)

        # Numpy fancy indexing to allow observed data to index
        # probability tensor
        self.index = (tt.shape_padright(tt.arange(self.probst.shape[0])),
 def __call__(self, x):
     for k1, k2 in self.res_kernels[0]:
         x += T.nnet.conv2d(relu(T.nnet.conv2d(x, k1, border_mode='half')),
     for i, (k, b) in enumerate(zip(self.layer_kernels, self.layer_biases)):
         if i > 0: x = relu(x)
         if self.upscale > 1:
             x = x.repeat(self.upscale,
                          axis=2)[:, :, :-1].repeat(self.upscale,
                                                    axis=3)[:, :, :, :-1]
         x = T.nnet.conv2d(x,
                           subsample=(self.downscale, self.downscale),
                           border_mode='half') + T.shape_padright(b, 2)
         for k1, k2 in self.res_kernels[i + 1]:
             x += T.nnet.conv2d(relu(
                 T.nnet.conv2d(x, k1, border_mode='half')),
     if self.output == 'linear': return x
     elif self.output == 'relu': return relu(x)
Exemple #57
def get_light_curve(time, tpeaks, fwhms, ampls, texp=None, oversample=7):
    time = time.astype("float64")
    time = tt.as_tensor_variable(time)

    if texp is None:
        tgrid = time
    if texp is not None:
        # taking this oversample code from
        # https://github.com/dfm/exoplanet
        # and https://github.com/lkreidberg/batman
        oversample = int(oversample)
        oversample += 1 - oversample % 2
        dt = np.linspace(-texp / 2.0, texp / 2.0, oversample)
        tgrid = tt.shape_padright(time) + dt

    multiflare_lc = multiflaremodel(tgrid, tpeaks, fwhms, ampls)

    if texp is not None:
        multiflare_lc = tt.mean(
            tt.reshape(multiflare_lc, (-1, oversample)), axis=1
    return multiflare_lc
Exemple #58
    def __init__(self, input, image_shape, pool_size, sparse_count):

        #not implementing max pooling as of now. have to do with average pooling
        oneZeros = numpy.concatenate(([1], numpy.zeros(sparse_count)))
        x = numpy.insert(numpy.tile(oneZeros, pool_size[0] - 1),
                         (pool_size[0] - 1) * (len(oneZeros)), 1)
        y = numpy.insert(numpy.tile(oneZeros, pool_size[1] - 1),
                         (pool_size[1] - 1) * (len(oneZeros)), 1)
        z = numpy.insert(numpy.tile(oneZeros, pool_size[2] - 1),
                         (pool_size[2] - 1) * (len(oneZeros)), 1)
        mask = numpy.outer(numpy.outer(x, y),
                           z).reshape(len(x), len(y), len(z))
        mask = numpy.ones((1, 1, len(x), len(y), len(z))) * mask
        self.pool_mask = mask.astype(
            theano.config.floatX) / numpy.prod(pool_size)

        frame_shape = input.shape[-3:]
        batch_size = T.shape_padright(T.prod(input.shape[:-3]), 1)
        new_shape = T.cast(
            T.join(0, batch_size, T.as_tensor([
            ]), frame_shape), 'int32')
        filter_shape = (1, 1, len(x), len(y), len(z))
        input_5d = T.reshape(input, new_shape, ndim=5)
        image_shape = (image_shape[0] * image_shape[1], 1, image_shape[2],
                       image_shape[3], image_shape[4])
        avg_out = conv3d(
            signals=input_5d.dimshuffle([0, 2, 1, 3, 4]),
            filters=self.pool_mask.transpose(0, 2, 1, 3, 4),
            signals_shape=[image_shape[i] for i in [0, 2, 1, 3, 4]],
            filters_shape=[filter_shape[i] for i in [0, 2, 1, 3, 4]],
            border_mode='valid').dimshuffle([0, 2, 1, 3, 4])
        outshp = T.join(0, input.shape[:-3], avg_out.shape[-3:])
        avg_out = T.reshape(avg_out, outshp, ndim=5)

        self.outputlen = (image_shape[2] - len(x) + 1,
                          image_shape[3] - len(y) + 1,
                          image_shape[4] - len(z) + 1)
        self.output = avg_out
Exemple #59
def _apply_index(data, indices, axis=0):
    Indexes data along a single axis.

    Indicies is a tensor of indices shaped like data minus the given axis.
    The result is a slice of data removing the given axis: for each entry
    of the othe dimiensions, the given index for that axis is used to select
    the single item.

    _apply_index can be used to derefernce the tensor search results
    returned from tensor.argmax().
    ndim = data.type.ndim
    shape = data.shape
    if indices.type.ndim < ndim - 1:
        indices = tensor.shape_padright(indices,
                                        n_ones=ndim - indices.type.ndim - 1)
    return data[tuple(
        indices if a == axis else
        _axis_count(shape, a, ndim -
                    1) if a < axis else _axis_count(shape, a - 1, ndim - 1)
        for a in range(ndim))]
Exemple #60
    def __call__(self, target, context, mask=None):
        target = target * T.cast(T.shape_padright(mask), 'float32')
        padded_mask = self.get_padded_shuffled_mask(mask, pad=1)
        # target = theano.printing.Print('X::' + self.name)(target)
        X_shifted = T.concatenate([
            alloc_zeros_matrix(target.shape[0], 1, self.input_dim),
            target[:, 0:-1, :]

        # X = theano.printing.Print('X::' + self.name)(X)
        # X = T.zeros_like(target)
        # T.set_subtensor(X[:, 1:, :], target[:, 0:-1, :])

        X = X_shifted.dimshuffle((1, 0, 2))

        ctx_step = context.dimshuffle(('x', 0, 1))
        x_z = T.dot(X, self.W_z) + T.dot(ctx_step, self.C_z) + self.b_z
        x_r = T.dot(X, self.W_r) + T.dot(ctx_step, self.C_r) + self.b_r
        x_h = T.dot(X, self.W_h) + T.dot(ctx_step, self.C_h) + self.b_h

        h, updates = theano.scan(self._step,
                                 sequences=[x_z, x_r, x_h, padded_mask],
                                         X.shape[1], self.hidden_dim), 1),
                                 non_sequences=[self.U_z, self.U_r, self.U_h])

        # (batch_size, max_token_len, hidden_dim)
        h = h.dimshuffle((1, 0, 2))

        # (batch_size, max_token_len, vocab_size)
        predicts = T.dot(h, self.U_y) + T.dot(context.dimshuffle(
            (0, 'x', 1)), self.C_y) + self.b_y  # + T.dot(X_shifted, self.W_y)

        predicts_flatten = predicts.reshape((-1, predicts.shape[2]))
        return T.nnet.softmax(predicts_flatten).reshape(
            (predicts.shape[0], predicts.shape[1], predicts.shape[2]))