Ejemplo n.º 1
0
def masked_softmax2(x, mask=None):
    """
    softmax over axis=1
    deal with case where mask may be all 0
    :param x: 
    :param mask: 
    :return: 
    """
    if x.ndim not in (1, 2) \
            or x.dtype not in T.float_dtypes:
        raise ValueError('x must be 1-d or 2-d tensor of floats. Got %s with ndim=%d' %
                         x.type, x.ndim)
    if mask is not None and mask.ndim != x.ndim:
        raise ValueError('mask must have the same dim with x. Got x=%d-d and mask=%d-d' %
                         x.ndim, mask.ndim)
    if x.ndim == 1:
        x = T.shape_padleft(x, n_ones=1)

    if mask is not None and mask.ndim == 1:
        mask = T.shape_padleft(mask, n_ones=1)

    e_x = T.exp(x - x.max(axis=1)[:, None])
    if mask is not None:
        e_x *= mask
    # avoid 0-division
    denorm = e_x.sum(axis=1) + 1.0 - T.max(mask, axis=1)
    sm = e_x / denorm[:, None]
    return sm
Ejemplo n.º 2
0
def masked_softmax(x, mask=None):
    """
    softmax over axis=1
    there must be at least one 1 in mask
    :param x: 
    :param mask: 
    :return: 
    """
    if x.ndim not in (1, 2) \
            or x.dtype not in T.float_dtypes:
        raise ValueError('x must be 1-d or 2-d tensor of floats. Got %s with ndim=%d' % (x.type, x.ndim))
    if mask and mask.ndim != x.ndim:
        raise ValueError('mask must have the same dim with x. Got x.ndim=%d and mask.ndim=%d' % (x.ndim, mask.ndim))
    if x.ndim == 1:
        x = T.shape_padleft(x, n_ones=1)

    if mask is not None and mask.ndim == 1:
        mask = T.shape_padleft(mask, n_ones=1)

    e_x = T.exp(x - x.max(axis=1)[:, None])
    if mask is not None:
        e_x *= mask

    sm = e_x / e_x.sum(axis=1)[:, None]
    return sm
Ejemplo n.º 3
0
def bn_ff(x,
          mean,
          var,
          gamma=1.,
          beta=0.,
          prefix="",
          axis=0,
          use_popstats=False):
    assert x.ndim == 2
    if not use_popstats:
        mean = x.mean(axis=axis)
        var = x.var(axis=axis)
        mean.tag.bn_statistic = True
        mean.tag.bn_label = prefix + "_mean"
        var.tag.bn_statistic = True
        var.tag.bn_label = prefix + "_var"

    var_corrected = var + 1e-7
    y = theano.tensor.nnet.bn.batch_normalization(inputs=x,
                                                  gamma=gamma,
                                                  beta=beta,
                                                  mean=T.shape_padleft(mean),
                                                  std=T.shape_padleft(
                                                      T.sqrt(var_corrected)))
    assert y.ndim == 2
    return y, mean, var
Ejemplo n.º 4
0
 def create_prediction(self):#做一次predict的方法
     gfs=self.gfs
     pm25in=self.pm25in
     #初始第一次前传
     x=T.concatenate([gfs[:,0],gfs[:,1],gfs[:,2],pm25in[:,0],pm25in[:,1],self.cnt[:,:,0]],axis=1)
     if self.celltype==RNN:
         init_hiddens = [(T.repeat(T.shape_padleft(create_shared(layer.hidden_size, name="RNN.initial_hidden_state")),
                                   x.shape[0], axis=0)
                          if x.ndim > 1 else create_shared(layer.hidden_size, name="RNN.initial_hidden_state"))
                         if hasattr(layer, 'initial_hidden_state') else None
                         for layer in self.model.layers]
     if self.celltype==LSTM:
         init_hiddens = [(T.repeat(T.shape_padleft(create_shared(layer.hidden_size * 2, name="LSTM.initial_hidden_state")),
                                   x.shape[0], axis=0)
                          if x.ndim > 1 else create_shared(layer.hidden_size * 2, name="LSTM.initial_hidden_state"))
                         if hasattr(layer, 'initial_hidden_state') else None
                         for layer in self.model.layers]
     self.layerstatus=self.model.forward(x,init_hiddens)
     #results.shape?40*1
     self.results=self.layerstatus[-1]
     if self.steps > 1:
         self.layerstatus=self.model.forward(T.concatenate([gfs[:,1],gfs[:,2],gfs[:,3],pm25in[:,1],self.results,self.cnt[:,:,1]],axis=1),self.layerstatus)
         self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)      
         #前传之后step-2次
         for i in xrange(2,self.steps):
             self.layerstatus=self.model.forward(T.concatenate([gfs[:,i],gfs[:,i+1],gfs[:,i+2],T.shape_padright(self.results[:,i-2]),T.shape_padright(self.results[:,i-1]),self.cnt[:,:,i]],axis=1),self.layerstatus)
             #need T.shape_padright???
             self.results=T.concatenate([self.results,self.layerstatus[-1]],axis=1)
     return self.results
Ejemplo n.º 5
0
    def sym_mask_logdensity_estimator_intermediate(self, x, mask):
        non_linearity_name = self.parameters["nonlinearity"].get_name()
        assert non_linearity_name == "sigmoid" or non_linearity_name == "RLU"
        x = x.T  # BxD
        mask = mask.T  # BxD
        output_mask = constantX(1) - mask  # BxD
        D = constantX(self.n_visible)
        d = mask.sum(1)  # d is the 1-based index of the dimension whose value to infer (not the size of the context)
        masked_input = x * mask  # BxD
        h = self.nonlinearity(T.dot(masked_input, self.W1) + T.dot(mask, self.Wflags) + self.b1)  # BxH
        for l in xrange(self.n_layers - 1):
            h = self.nonlinearity(T.dot(h, self.Ws[l]) + self.bs[l])  # BxH
        z_alpha = T.tensordot(h, self.V_alpha, [[1], [1]]) + T.shape_padleft(self.b_alpha)
        z_mu = T.tensordot(h, self.V_mu, [[1], [1]]) + T.shape_padleft(self.b_mu)
        z_sigma = T.tensordot(h, self.V_sigma, [[1], [1]]) + T.shape_padleft(self.b_sigma)
        temp = T.exp(z_alpha)  # + 1e-6
        # temp += T.shape_padright(temp.sum(2)/1e-3)
        Alpha = temp / T.shape_padright(temp.sum(2))  # BxDxC
        Mu = z_mu  # BxDxC
        Sigma = T.exp(z_sigma)  # + 1e-6 #BxDxC

        # Alpha = Alpha * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Mu = Mu * T.shape_padright(output_mask)
        # Sigma = Sigma * T.shape_padright(output_mask) + T.shape_padright(mask)
        # Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x*output_mask)) / Sigma) - T.log(Sigma) - constantX(0.5 * np.log(2*np.pi)) #BxDxC

        Phi = (
            -constantX(0.5) * T.sqr((Mu - T.shape_padright(x)) / Sigma)
            - T.log(Sigma)
            - constantX(0.5 * np.log(2 * np.pi))
        )  # BxDxC
        logdensity = (log_sum_exp(Phi + T.log(Alpha), axis=2) * output_mask).sum(1) * D / (D - d)
        return (logdensity, z_alpha, z_mu, z_sigma, Alpha, Mu, Sigma, h)
Ejemplo n.º 6
0
def generic_compute_Lx_batches(samples, weights, biases, bs, cbs):
    tsamples = [x.reshape((bs//cbs, cbs, x.shape[1])) for x in samples]
    final_ws = [T.unbroadcast(T.shape_padleft(T.zeros_like(x)),0)
                for x in weights]
    final_bs = [T.unbroadcast(T.shape_padleft(T.zeros_like(x)),0)
                for x in biases]
    n_samples = len(samples)
    n_weights = len(weights)
    n_biases = len(biases)
    def comp_step(*args):
        lsamples = args[:n_samples]
        terms1 = generic_compute_Lx_term1(lsamples, weights, biases)
        rval = []
        for (term1, acc) in zip(terms1, args[n_samples:]):
            rval += [acc + term1]
        return rval

    rvals,_ = theano.sandbox.scan.scan(
        comp_step,
        sequences=tsamples,
        states=final_ws + final_bs,
        n_steps=bs // cbs,
        profile=0,
        mode=theano.Mode(linker='cvm_nogc'),
        flags=['no_optimization'] )
    accs1 = [x[0]/numpy.float32(bs//cbs) for x in rvals]
    accs2 = generic_compute_Lx_term2(samples,weights,biases)
    return [x - y for x, y in zip(accs1, accs2)]
Ejemplo n.º 7
0
    def pos_phase(self, v, init_state, n_steps=1, eps=1e-3):
        """
        Mixed mean-field + sampling inference in positive phase.
        :param v: input being conditioned on
        :param init: dictionary of initial values
        :param n_steps: number of Gibbs updates to perform afterwards.
        """
        def pos_mf_iteration(g1, h1, v, pos_counter):
            h2 = self.h_hat(g1, v)
            s2_1 = self.s1_hat(g1, v)
            s2_0 = self.s0_hat(g1, v)
            g2 = self.g_hat(h2, s2_1, s2_0)
            # stopping criterion
            dl_dghat = T.max(abs(self.dlbound_dg(g2, h2, s2_1, s2_0, v)))
            dl_dhhat = T.max(abs(self.dlbound_dh(g2, h2, s2_1, s2_0, v)))
            stop = T.maximum(dl_dghat, dl_dhhat)
            return [g2, h2, s2_1, s2_0, v, pos_counter + 1], theano.scan_module.until(stop < eps)

        states = [T.unbroadcast(T.shape_padleft(init_state['g'])),
                  T.unbroadcast(T.shape_padleft(init_state['h'])),
                  {'steps': 1},
                  {'steps': 1},
                  T.unbroadcast(T.shape_padleft(v)),
                  T.unbroadcast(T.shape_padleft(0.))]

        rvals, updates = scan(
                pos_mf_iteration,
                states = states,
                n_steps=n_steps)

        return [rval[0] for rval in rvals]
Ejemplo n.º 8
0
def compute_Lx_batches(v, g, h, xw_mat, xv_mat, xa, xb, xc, bs, cbs):
    xw = xw_mat.flatten()
    xv = xv_mat.flatten()
    tv = v.reshape((bs // cbs, cbs, v.shape[1]))
    tg = g.reshape((bs // cbs, cbs, g.shape[1]))
    th = h.reshape((bs // cbs, cbs, h.shape[1]))

    final_w1 = T.unbroadcast(T.shape_padleft(T.zeros_like(xw_mat)),0)
    final_v1 = T.unbroadcast(T.shape_padleft(T.zeros_like(xv_mat)),0)
    final_a1 = T.unbroadcast(T.shape_padleft(T.zeros_like(xa)),0)
    final_b1 = T.unbroadcast(T.shape_padleft(T.zeros_like(xb)),0)
    final_c1 = T.unbroadcast(T.shape_padleft(T.zeros_like(xc)),0)
    def comp_step(lv, lg, lh,
                  acc_w1, acc_v1, acc_a1, acc_b1, acc_c1):
        terms1 = compute_Lx_term1(lv, lg, lh, xw, xv, xa, xb, xc)
        accs1 = [acc_w1, acc_v1, acc_a1, acc_b1, acc_c1]
        rval = []

        for (term1, acc) in zip(terms1,accs1):
            rval += [acc + term1]
        return rval
    rvals,_ = theano.sandbox.scan.scan(
        comp_step,
        sequences=[tv,tg,th],
        states=[
            final_w1, final_v1, final_a1, final_b1, final_c1],
        n_steps=bs // cbs,
        profile=0,
        mode=theano.Mode(linker='cvm_nogc'),
        flags=['no_optimization'] )
    accs1 = [x[0]/numpy.float32(bs//cbs) for x in rvals]
    accs2 = compute_Lx_term2(v,g,h,xw,xv,xa,xb,xc)
    return [x - y for x, y in zip(accs1, accs2)]
def bn(x, gammas, betas, mean, var, args):
    assert mean.ndim == 1
    assert var.ndim == 1
    assert x.ndim == 2
    if not args.use_population_statistics:
        mean = x.mean(axis=0)
        var = x.var(axis=0)
    #var = T.maximum(var, args.epsilon)
    #var = var + args.epsilon

    if args.baseline:
        y = x + betas
    else:
        var_corrected = var + args.epsilon

        y = theano.tensor.nnet.bn.batch_normalization(
            inputs=x,
            gamma=gammas,
            beta=betas,
            mean=T.shape_padleft(mean),
            std=T.shape_padleft(T.sqrt(var_corrected)),
            mode="high_mem")
    assert mean.ndim == 1
    assert var.ndim == 1
    return y, mean, var
Ejemplo n.º 10
0
def NVIL(discriminator, log_Z, g_output_logit, n_samples, trng, batch_size=64):
    R = trng.uniform(size=(n_samples, batch_size, DIM_C, DIM_X, DIM_Y),
                     dtype=floatX_)
    g_output = T.nnet.sigmoid(g_output_logit)
    samples = (R <= T.shape_padleft(g_output)).astype(floatX_)

    D_r = lasagne.layers.get_output(discriminator)
    D_f = lasagne.layers.get_output(discriminator,
                                    samples.reshape((-1, DIM_C, DIM_X, DIM_Y)))
    D_f_ = D_f.reshape((n_samples, batch_size))
    log_w = D_f_
    log_g = -(
        (1. - samples) * T.shape_padleft(g_output_logit) +
        T.shape_padleft(T.nnet.softplus(-g_output_logit))).sum(axis=(2, 3, 4))

    log_N = T.log(log_w.shape[0]).astype(floatX_)
    log_Z_est = log_sum_exp(log_w - log_N, axis=0)
    log_w_tilde = log_w - T.shape_padleft(log_Z_est) - log_N
    w_tilde = T.exp(log_w_tilde)
    r = theano.gradient.disconnected_grad((log_w - log_Z - 1))

    generator_loss = -(r * log_g).mean()
    discriminator_loss = (T.nnet.softplus(-D_r)).mean() + (
        T.nnet.softplus(-D_f)).mean() + D_f.mean()

    return generator_loss, discriminator_loss, D_r, D_f, log_Z_est, log_w, w_tilde, {}
Ejemplo n.º 11
0
    def Step_decoding(self):
        ## if i_y = None, then it means decoding the first word

        i_y = tensor.matrix('i_y', dtype='int64')
        i_y_mask = tensor.matrix('i_y_mask', dtype=config.floatX)
        h_ = tensor.matrix('h', dtype=config.floatX)
        c_ = tensor.matrix('c', dtype=config.floatX)
        flag = tensor.scalar('flag', dtype=config.floatX)
        state_below = tensor.alloc(numpy_floatX(0.), i_y_mask.shape[0],
                                   i_y_mask.shape[1], self.dim)
        ## shape=(1,n,d)
        shape = (i_y.shape[0], i_y.shape[1], self.dim)
        #i_y_repr = self.emb.emb_W[i_y.flatten()].reshape(shape)
        i_y_repr = self.emb.fprop(i_y)

        state_below = ifelse(tensor.gt(flag, 0.5), i_y_repr, state_below)
        #state_below = tensor.switch(tensor.gt(flag, 0.5), self.emb.fprop(i_y), state_below)
        proj_h, proj_c = self.fprop(state_below, i_y_mask, h_, c_)
        proj_h, proj_c = proj_h[0], proj_c[0]

        final_layer_h = _slice(proj_h, self.layers - 1, self.dim)
        proj_xx = tensor.dot(final_layer_h, self.U)
        proj_x = proj_xx + self.b
        assert proj_h.ndim == 2, 'ndim error'
        self.dbg = theano.function([i_y,i_y_mask,h_,c_,flag],\
                    [proj_h, self.U.shape, self.b.shape],on_unused_input='ignore')
        prob = softmax(proj_x)
        self.comp_next_probs_hc = theano.function([i_y,i_y_mask,h_,c_,flag], \
            [tensor.shape_padleft(prob), tensor.shape_padleft(proj_h), tensor.shape_padleft(proj_c), proj_x, ])
Ejemplo n.º 12
0
 def set_output(self):
     self._output = tensor.sum(
         tensor.shape_padright(self._prev_layer.output) *
         tensor.shape_padleft(self.W.val),
         axis=-2)
     if self._bias:
         self._output += tensor.shape_padleft(self.b.val)
Ejemplo n.º 13
0
def bn(x,
       gammas,
       betas,
       mean,
       var,
       args,
       axis=0,
       prefix="",
       normalize=True,
       use_popstats=False):
    assert x.ndim == 2
    if not normalize or not use_popstats:
        ### FIXME do we want to share statistics across space as well?
        mean = x.mean(axis=axis)
        var = x.var(axis=axis)
        mean.tag.bn_statistic = True
        mean.tag.bn_label = prefix + "_mean"
        var.tag.bn_statistic = True
        var.tag.bn_label = prefix + "_var"

    if not args.use_bn:
        y = x + betas
    else:
        var_corrected = var + 1e-7
        y = theano.tensor.nnet.bn.batch_normalization(
            inputs=x,
            gamma=gammas,
            beta=betas,
            mean=T.shape_padleft(mean),
            std=T.shape_padleft(T.sqrt(var_corrected)),
            mode="low_mem")
    return y, mean, var
Ejemplo n.º 14
0
 def eval(r, s, coord='xy'):
     if coord == 'uv':
         return self.eval(r, s, coord) * other.eval(r, s, coord)
     else:  # xy
         m1 = self.eval(r, s, coord)
         m2 = other.eval(r, s, coord)
         # try loop doesn't work here.. sometimes fftconvolve perfectly happy with T object
         # note that for this to work, coord must be a uniform grid
         if all((isinstance(m1, (int, long, float, complex, np.ndarray))
                 for obj in (m1, m2))):
             from scipy.signal import fftconvolve
             dv = (r[0, 1] - r[0, 0]) * (s[1, 0] - s[0, 0]
                                         )  # must be from e.g. meshgrid
             ret = fftconvolve(m1, m2, mode='same') * dv
             print("numpy path (convolve)")
             return ret
         else:
             print("theano path (convolve)")
             import theano.tensor as T
             dv = (r[0, 1] - r[0, 0]) * (s[1, 0] - s[0, 0]
                                         )  # must be from e.g. meshgrid
             # ret = fftconvolve(m1, m2, mode='same') * dv
             m1pad = T.shape_padleft(m1, 2)
             m2pad = T.shape_padleft(m2, 2)
             ret = T.nnet.conv2d(
                 m1pad, m2pad, border_mode='half',
                 filter_flip=False)[0, 0] / dv
             return ret
Ejemplo n.º 15
0
def filter_and_prob(inpt, transition, emission,
           visible_noise_mean, visible_noise_cov,
           hidden_noise_mean, hidden_noise_cov,
           initial_hidden, initial_hidden_cov):
    step = forward_step(
        transition, emission,
        visible_noise_mean, visible_noise_cov,
        hidden_noise_mean, hidden_noise_cov)

    hidden_mean_0 = T.zeros_like(hidden_noise_mean).dimshuffle('x', 0)
    hidden_cov_0 = T.zeros_like(hidden_noise_cov).dimshuffle('x', 0, 1)
    f0, F0, ll0 = step(inpt[0], hidden_mean_0, hidden_cov_0)
    replace = {hidden_noise_mean: initial_hidden, 
               hidden_noise_cov: initial_hidden_cov}
    f0 = theano.clone(f0, replace)
    F0 = theano.clone(F0, replace)
    ll0 = theano.clone(ll0, replace)

    (f, F, ll), _ = theano.scan(
        step,
        sequences=inpt[1:],
        outputs_info=[f0, F0, None])

    ll = ll.sum(axis=0)

    f = T.concatenate([T.shape_padleft(f0), f])
    F = T.concatenate([T.shape_padleft(F0), F])
    ll += ll0

    return f, F, ll
Ejemplo n.º 16
0
def BGAN(discriminator, g_output_logit, n_samples, trng, batch_size=64):
    d = OrderedDict()
    R = trng.uniform(size=(n_samples, batch_size, DIM_C, DIM_X, DIM_Y),
                     dtype=floatX_)
    g_output = T.nnet.sigmoid(g_output_logit)
    samples = (R <= T.shape_padleft(g_output)).astype(floatX_)

    # Create expression for passing real data through the discriminator
    D_r = lasagne.layers.get_output(discriminator)
    D_f = lasagne.layers.get_output(discriminator,
                                    samples.reshape((-1, DIM_C, DIM_X, DIM_Y)))
    D_f_ = D_f.reshape((n_samples, batch_size))

    log_d1 = -T.nnet.softplus(-D_f_)
    log_d0 = -(D_f_ + T.nnet.softplus(-D_f_))
    log_w = log_d1 - log_d0
    log_g = -(
        (1. - samples) * T.shape_padleft(g_output_logit) +
        T.shape_padleft(T.nnet.softplus(-g_output_logit))).sum(axis=(2, 3, 4))

    log_N = T.log(log_w.shape[0]).astype(floatX_)
    log_Z_est = log_sum_exp(log_w - log_N, axis=0)
    log_w_tilde = log_w - T.shape_padleft(log_Z_est) - log_N
    w_tilde = T.exp(log_w_tilde)
    w_tilde_ = theano.gradient.disconnected_grad(w_tilde)

    generator_loss = -(w_tilde_ * log_g).sum(0).mean()
    discriminator_loss = (T.nnet.softplus(-D_r)).mean() + (
        T.nnet.softplus(-D_f)).mean() + D_f.mean()

    return generator_loss, discriminator_loss, D_r, D_f, log_Z_est, log_w, w_tilde, d
Ejemplo n.º 17
0
    def attention_gate(self, facts, memory, question):
        # TODO: for the first iteration question and memory are the same so
        # we can speedup the computation

        # facts is (num_batch * fact_length * memory_dim)
        # questions is (num_batch * memory_dim)
        # memory is (num_batch * memory_dim)
        # attention_gates must be (fact_length * nb_batch * 1)

        # Compute z (num_batch * fact_length * (7*memory_dim + 2))

        # Dimshuffle facts to get a shape of
        # (fact_length * num_batch * memory_dim)
        facts = facts.dimshuffle(1, 0, 2)

        # Pad questions and memory to be of shape
        # (_ * num_batch * memory_dim)
        memory = T.shape_padleft(memory)
        question = T.shape_padleft(question)

        to_concatenate = list()
        to_concatenate.extend([facts, memory, question])
        to_concatenate.extend([facts * question, facts * memory])
        to_concatenate.extend([T.abs_(facts - question),
                               T.abs_(facts - memory)])

        # z = concatenate(to_concatenate, axis=2)

        # TODO: to be continued for the moment just return ones
        return T.ones((facts.shape[1], facts.shape[0], 1))
Ejemplo n.º 18
0
def im_to_col(im, psize, n_channels=3):
    """Similar to MATLAB's im2col function.

  Args:
    im - a Theano tensor3, of the form <n_channels, height, width>.
    psize - an int specifying the (square) block size to use
    n_channels - the number of channels in im

  Returns: a 5-tensor of the form <patch_id_i, patch_id_j, n_channels, psize,
           psize>.
  """
    assert im.ndim == 3, "im must have dimension 3."
    im = im[:, ::-1, ::-1]
    res = T.zeros((n_channels, psize * psize, im.shape[1] - psize + 1,
                   im.shape[2] - psize + 1))
    filts = T.reshape(T.eye(psize * psize, psize * psize),
                      (psize * psize, psize, psize))
    filts = T.shape_padleft(filts).dimshuffle((1, 0, 2, 3))

    for i in range(n_channels):
        cur_slice = T.shape_padleft(im[i], n_ones=2)
        res = T.set_subtensor(res[i], T.nnet.conv.conv2d(cur_slice, filts)[0])

    return res.dimshuffle((0, 2, 3, 1)).reshape(
        (n_channels, im.shape[1] - psize + 1, im.shape[2] - psize + 1, psize,
         psize)).dimshuffle((1, 2, 0, 3, 4))
Ejemplo n.º 19
0
        def density_given_previous_a_and_x(
            x,
            w,
            V_alpha,
            b_alpha,
            V_mu,
            b_mu,
            V_sigma,
            b_sigma,
            activation_factor,
            p_prev,
            a_prev,
            x_prev,
        ):
            a = a_prev + x_prev * w
            h = self.nonlinearity(a * activation_factor)  # BxH

            Alpha = T.nnet.softmax(
                T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp(
                (T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(
                -constantX(0.5) * T.sqr((Mu - x) / Sigma) - T.log(Sigma) -
                constantX(0.5 * numpy.log(2 * numpy.pi)) + T.log(Alpha))
            return (p, a, x)
Ejemplo n.º 20
0
    def __init__(self, n, p, *args, **kwargs):
        super(Multinomial, self).__init__(*args, **kwargs)

        p = p / tt.sum(p, axis=-1, keepdims=True)
        n = np.squeeze(n) # works also if n is a tensor

        if len(self.shape) > 1:
            m = self.shape[-2]
            try:
                assert n.shape == (m,)
            except (AttributeError, AssertionError):
                n = n * tt.ones(m)
            self.n = tt.shape_padright(n)
            self.p = p if p.ndim > 1 else tt.shape_padleft(p)
        elif n.ndim == 1:
            self.n = tt.shape_padright(n)
            self.p = p if p.ndim > 1 else tt.shape_padleft(p)
        else:
            # n is a scalar, p is a 1d array
            self.n = tt.as_tensor_variable(n)
            self.p = tt.as_tensor_variable(p)

        self.mean = self.n * self.p
        mode = tt.cast(tt.round(self.mean), 'int32')
        diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
        inc_bool_arr = tt.abs_(diff) > 0
        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
                                diff[inc_bool_arr.nonzero()])
        self.mode = mode
Ejemplo n.º 21
0
    def __init__(self, n, p, *args, **kwargs):
        super(Multinomial, self).__init__(*args, **kwargs)

        p = p / tt.sum(p, axis=-1, keepdims=True)
        n = np.squeeze(n)  # works also if n is a tensor

        if len(self.shape) > 1:
            m = self.shape[-2]
            try:
                assert n.shape == (m, )
            except (AttributeError, AssertionError):
                n = n * tt.ones(m)
            self.n = tt.shape_padright(n)
            self.p = p if p.ndim > 1 else tt.shape_padleft(p)
        elif n.ndim == 1:
            self.n = tt.shape_padright(n)
            self.p = p if p.ndim > 1 else tt.shape_padleft(p)
        else:
            # n is a scalar, p is a 1d array
            self.n = tt.as_tensor_variable(n)
            self.p = tt.as_tensor_variable(p)

        self.mean = self.n * self.p
        mode = tt.cast(tt.round(self.mean), 'int32')
        diff = self.n - tt.sum(mode, axis=-1, keepdims=True)
        inc_bool_arr = tt.abs_(diff) > 0
        mode = tt.inc_subtensor(mode[inc_bool_arr.nonzero()],
                                diff[inc_bool_arr.nonzero()])
        self.mode = mode
Ejemplo n.º 22
0
def _span_sums(stt, end, p_lens, max_p_len, batch_size, dim, max_ans_len):
  # Sum of every start element and corresponding max_ans_len end elements.
  #
  # stt     (max_p_len, batch_size, dim)
  # end     (max_p_len, batch_size, dim)
  # p_lens  (batch_size,)
  max_ans_len_range = tt.shape_padleft(tt.arange(max_ans_len))          # (1, max_ans_len)
  offsets = tt.shape_padright(tt.arange(max_p_len))                     # (max_p_len, 1)
  end_idxs = max_ans_len_range + offsets                                # (max_p_len, max_ans_len)
  end_idxs_flat = end_idxs.flatten()                                    # (max_p_len*max_ans_len,)

  end_padded = tt.concatenate(                                          # (max_p_len+max_ans_len-1, batch_size, dim)
    [end, tt.zeros((max_ans_len-1, batch_size, dim))], axis=0)    
  end_structured = end_padded[end_idxs_flat]                            # (max_p_len*max_ans_len, batch_size, dim)
  end_structured = end_structured.reshape(                              # (max_p_len, max_ans_len, batch_size, dim)
    (max_p_len, max_ans_len, batch_size, dim))
  stt_shuffled = stt.dimshuffle((0,'x',1,2))                            # (max_p_len, 1, batch_size, dim)

  span_sums = stt_shuffled + end_structured                             # (max_p_len, max_ans_len, batch_size, dim)
  span_sums_reshaped = span_sums.dimshuffle((2,0,1,3)).reshape(         # (batch_size, max_p_len*max_ans_len, dim)
    (batch_size, max_p_len*max_ans_len, dim))

  p_lens_shuffled = tt.shape_padright(p_lens)                           # (batch_size, 1)
  end_idxs_flat_shuffled = tt.shape_padleft(end_idxs_flat)              # (1, max_p_len*max_ans_len)

  span_masks_reshaped = tt.lt(end_idxs_flat_shuffled, p_lens_shuffled)  # (batch_size, max_p_len*max_ans_len)
  span_masks_reshaped = cast_floatX(span_masks_reshaped)

  # (batch_size, max_p_len*max_ans_len, dim), (batch_size, max_p_len*max_ans_len)
  return span_sums_reshaped, span_masks_reshaped
Ejemplo n.º 23
0
 def get_output_for(self, input, **kwargs):
     if self.tied_feamap:
         return input * T.gt(input, 0) + input * T.le(input, 0) \
              * T.shape_padleft(T.shape_padright(self.W[seg], n_ones = len(input_dim) - 2))
     else:
         return input * T.gt(input, 0) + input * T.le(input, 0) \
              * T.shape_padleft(self.W)
def bn(x, gammas, betas, mean, var, args):
    assert mean.ndim == 1
    assert var.ndim == 1
    assert x.ndim == 2
    if not args.use_population_statistics:
        mean = x.mean(axis=0)
        var = x.var(axis=0)
    #var = T.maximum(var, args.epsilon)
    #var = var + args.epsilon

    if args.baseline:
        y = x + betas
    else:
        #var_corrected = var.zeros_like() + 1.0
        if args.clipvar:
            var_corrected = theano.tensor.switch(theano.tensor.eq(var, 0.), 1.0, var + args.epsilon)
        else:
            var_corrected = var + args.epsilon

        y = theano.tensor.nnet.bn.batch_normalization(
            inputs=x, gamma=gammas, beta=betas,
            mean=T.shape_padleft(mean), std=T.shape_padleft(T.sqrt(var_corrected)),
            mode="high_mem")
    assert mean.ndim == 1
    assert var.ndim == 1
    return y, mean, var
Ejemplo n.º 25
0
    def sample(self,
               x0=None,
               h0s=None,
               n_samples=10,
               n_steps=10,
               condition_on=None,
               debug=False):
        '''Samples from an initial state.

        Args:
            x0 (Optional[T.tensor]): initial input state.
            h0 (Optional[T.tensor]): initial recurrent state.
            n_samples (Optional[int]): if no x0 or h0, used to initial batch.
                Number of chains.
            n_steps (int): number of sampling steps.

        Returns:
            OrderedDict: dictionary of results. hiddens, probabilities, and preacts.
            theano.OrderedUpdates: updates.

        '''
        if x0 is None:
            x0, _ = self.output_net.sample(
                p=T.constant(0.5).astype(floatX),
                size=(n_samples, self.output_net.dim_out)).astype(floatX)

        if h0s is None:
            h0s = [
                T.alloc(0., x.shape[1], dim_h).astype(floatX)
                for dim_h in self.dim_hs
            ]

        seqs = []
        outputs_info = h0s + [x0, None, None]
        non_seqs = []
        non_seqs += self.get_sample_params()

        if n_steps == 1:
            inps = outputs_info[:-2] + non_seqs
            outs = self.step_sample(*inps)
            updates = theano.OrderedUpdates()
            hs = outs[:self.n_layers]
            x, p, z = outs[-3:]
            x = T.shape_padleft(x)
            p = T.shape_padleft(p)
            z = T.shape_padleft(z)
            hs = [T.shape_padleft(h) for h in hs]
        else:
            outs, updates = scan(self.step_sample,
                                 seqs,
                                 outputs_info,
                                 non_seqs,
                                 n_steps,
                                 name=self.name + '_sampling',
                                 strict=False)
            hs = outs[:self.n_layers]
            x, p, z = outs[-3:]

        return OrderedDict(x=x, p=p, z=z, hs=hs), updates
Ejemplo n.º 26
0
def scalar_armijo_search(phi,
                         phi0,
                         derphi0,
                         c1=constant(1e-4),
                         n_iters=10,
                         profile=0):
    """
    .. todo::

        WRITEME
    """
    alpha0 = one
    phi_a0 = phi(alpha0)
    alpha1 = -(derphi0) * alpha0 ** 2 / 2.0 /\
            (phi_a0 - phi0 - derphi0 * alpha0)
    phi_a1 = phi(alpha1)

    csol1 = phi_a0 <= phi0 + c1 * derphi0
    csol2 = phi_a1 <= phi0 + c1 * alpha1 * derphi0

    def armijo(alpha0, alpha1, phi_a0, phi_a1):
        factor = alpha0**2 * alpha1**2 * (alpha1 - alpha0)
        a = alpha0 ** 2 * (phi_a1 - phi0 - derphi0 * alpha1) - \
            alpha1 ** 2 * (phi_a0 - phi0 - derphi0 * alpha0)
        a = a / factor
        b = -alpha0 ** 3 * (phi_a1 - phi0 - derphi0 * alpha1) + \
            alpha1 ** 3 * (phi_a0 - phi0 - derphi0 * alpha0)
        b = b / factor

        alpha2 = (-b + TT.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0 * a)
        phi_a2 = phi(alpha2)

        end_condition = phi_a2 <= phi0 + c1 * alpha2 * derphi0
        end_condition = TT.bitwise_or(TT.isnan(alpha2), end_condition)
        end_condition = TT.bitwise_or(TT.isinf(alpha2), end_condition)
        alpha2 = TT.switch(
            TT.bitwise_or(alpha1 - alpha2 > alpha1 / constant(2.),
                          one - alpha2 / alpha1 < 0.96), alpha1 / constant(2.),
            alpha2)
        return [alpha1, alpha2, phi_a1, phi_a2], \
                theano.scan_module.until(end_condition)

    states = []
    states += [TT.unbroadcast(TT.shape_padleft(alpha0), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(alpha1), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(phi_a0), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(phi_a1), 0)]
    # print 'armijo'
    rvals, _ = scan(armijo,
                    states=states,
                    n_steps=n_iters,
                    name='armijo',
                    mode=theano.Mode(linker='cvm'),
                    profile=profile)

    sol_scan = rvals[1][0]
    a_opt = ifelse(csol1, one, ifelse(csol2, alpha1, sol_scan))
    score = ifelse(csol1, phi_a0, ifelse(csol2, phi_a1, rvals[2][0]))
    return a_opt, score
    def get_output(self, train=False):
        X = self.get_input(train=train)
        c0 = self.c0[None,:] * T.ones((X.shape[0], self.context_dim))
        cn = self.cn[None,:] * T.ones((X.shape[0], self.context_dim))
        X = T.concatenate(
            [
                T.shape_padleft(self.e0,2) * T.ones((X.shape[0], 1, X.shape[2])),
                X,
                T.shape_padleft(self.en,2) * T.ones((X.shape[0], 1, X.shape[2])),
            ],
            axis = 1
        )
        X = X.dimshuffle(1,0,2) # timestep 置于第一纬
        # 只有将int32 mask 强制转换为 float32 才不会在scan里面将mask_t[:, None] * cl_t 结果upcast成float64
        mask = T.cast(self.get_output_mask(train=train), T.config.floatX)
        mask = mask.dimshuffle(1,0) # timestep 置于第一纬
        #theano.printing.debugprint([mask], print_type=True)
        def _forward_step(e_t, e_tm1, mask_t, cl_tm1):
            #print 'e_t:', e_t.type.ndim
            #print 'cl_t:', cl_tm1.type.ndim
            cl_t = T.nnet.sigmoid(
                T.dot(cl_tm1, self.Wl) + T.dot(e_tm1, self.Wsl)
            )
            cl_t = mask_t[:, None] * cl_t + (1. - mask_t[:, None]) * cl_tm1 # 如果它被mask就直接继承那个词
            #theano.printing.debugprint([mask_t], print_type=True)
            #theano.printing.debugprint([cl_t], print_type=True)
            return cl_t
        def _backward_step(e_t, e_tp1, mask_t, cr_tp1):
            cr_t = T.nnet.sigmoid(
            T.dot(cr_tp1, self.Wr) + T.dot(e_tp1, self.Wsr))
            cr_t = mask_t[:, None] * cr_t + (1. - mask_t[:, None]) * cr_tp1 # 如果它被mask就直接继承那个词
            return cr_t
        Cl, _ = theano.scan(_forward_step,
                        sequences=[dict(input=X, taps=[0, -1]), mask],
                        outputs_info=[
                            dict(initial=c0, taps=[-1]) # 注意不是c0!!!
                        ],

        )
        Cr, _ = theano.scan(_backward_step,
                            sequences=[dict(input=X, taps=[0, -1]), mask],
                            outputs_info=[
                                dict(initial=cn, taps=[-1])
                            ],
                            go_backwards=True,
        )
        Cr = Cr[::-1] # 翻转Cr
        def _concatenate_activation_step(e_t, mask_t, cl_t, cr_t):
            #print theano.printing.debugprint(cr_t, print_type=True)
            h_t = T.tanh( T.dot(T.concatenate([e_t, cl_t, cr_t], axis=1), self.W2)
                       + self.b2)
            h_t = mask_t[:, None] * h_t + (1. - mask_t[:, None]) * (-10000000000.) # 将mask的地方设置为最小值
            return h_t

        Y, _ = theano.scan(_concatenate_activation_step,
                    sequences=[X, mask, Cl, Cr],
                    outputs_info=None,
        )
        return Y.dimshuffle(1,0,2) # 重置样本为第一维
Ejemplo n.º 28
0
def transform_targets(targets):
    """Transform targets into a format suitable for passing to cost()."""

    reshaped = T.shape_padleft(targets)
    blanks = T.fill(reshaped, _BLANK)
    result = T.concatenate([blanks, reshaped]).dimshuffle(1, 0, 2).reshape((2*targets.shape[0], targets.shape[1]))
    result = T.concatenate([result, T.shape_padleft(result[0])])
    return result
Ejemplo n.º 29
0
def scalar_armijo_search(phi, phi0, derphi0, c1=constant(1e-4),
                         n_iters=10, profile=0):
    alpha0 = one
    phi_a0 = phi(alpha0)
    alpha1 = -(derphi0) * alpha0 ** 2 / 2.0 /\
            (phi_a0 - phi0 - derphi0 * alpha0)
    phi_a1 = phi(alpha1)

    csol1 = phi_a0 <= phi0 + c1 * derphi0
    csol2 = phi_a1 <= phi0 + c1 * alpha1 * derphi0

    def armijo(alpha0, alpha1, phi_a0, phi_a1):
        factor = alpha0 ** 2 * alpha1 ** 2 * (alpha1 - alpha0)
        a = alpha0 ** 2 * (phi_a1 - phi0 - derphi0 * alpha1) - \
            alpha1 ** 2 * (phi_a0 - phi0 - derphi0 * alpha0)
        a = a / factor
        b = -alpha0 ** 3 * (phi_a1 - phi0 - derphi0 * alpha1) + \
            alpha1 ** 3 * (phi_a0 - phi0 - derphi0 * alpha0)
        b = b / factor

        alpha2 = (-b + TT.sqrt(abs(b ** 2 - 3 * a * derphi0))) / (3.0 * a)
        phi_a2 = phi(alpha2)

        end_condition = phi_a2 <= phi0 + c1 * alpha2 * derphi0
        end_condition = TT.bitwise_or(
            TT.isnan(alpha2), end_condition)
        end_condition = TT.bitwise_or(
            TT.isinf(alpha2), end_condition)
        alpha2 = TT.switch(
            TT.bitwise_or(alpha1 - alpha2 > alpha1 / constant(2.),
                  one - alpha2 / alpha1 < 0.96),
            alpha1 / constant(2.),
            alpha2)
        return [alpha1, alpha2, phi_a1, phi_a2], \
                theano.scan_module.until(end_condition)

    states = []
    states += [TT.unbroadcast(TT.shape_padleft(alpha0), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(alpha1), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(phi_a0), 0)]
    states += [TT.unbroadcast(TT.shape_padleft(phi_a1), 0)]
    # print 'armijo'
    rvals, _ = scan(
                armijo,
                states=states,
                n_steps=n_iters,
                name='armijo',
                mode=theano.Mode(linker='cvm'),
                profile=profile)

    sol_scan = rvals[1][0]
    a_opt = ifelse(csol1, one,
                ifelse(csol2, alpha1,
                    sol_scan))
    score = ifelse(csol1, phi_a0,
                   ifelse(csol2, phi_a1,
                          rvals[2][0]))
    return a_opt, score
Ejemplo n.º 30
0
def transform_targets(targets):
    """Transform targets into a format suitable for passing to cost()."""

    reshaped = T.shape_padleft(targets)
    blanks = T.fill(reshaped, _BLANK)
    result = T.concatenate([blanks, reshaped]).dimshuffle(1, 0, 2).reshape(
        (2 * targets.shape[0], targets.shape[1]))
    result = T.concatenate([result, T.shape_padleft(result[0])])
    return result
Ejemplo n.º 31
0
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma,activation_factor, p_prev, a_prev, x_prev,):
            a = a_prev + x_prev * w
            h = self.nonlinearity(a * activation_factor)  # BxH

            Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(-constantX(0.5) * T.sqr((Mu - x) / Sigma) - T.log(Sigma) - constantX(0.5 * numpy.log(2 * numpy.pi)) + T.log(Alpha))
            return (p, a, x)
Ejemplo n.º 32
0
        def density_given_previous_a_and_x(x, w, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activations_factor, p_prev, a_prev, x_prev):
            a = a_prev + T.dot(T.shape_padright(x_prev, 1), T.shape_padleft(w, 1))
            h = self.nonlinearity(a * activations_factor)  # BxH

            Alpha = T.nnet.softmax(T.dot(h, V_alpha) + T.shape_padleft(b_alpha))  # BxC
            Mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)  # BxC
            Sigma = T.exp((T.dot(h, V_sigma) + T.shape_padleft(b_sigma)))  # BxC
            p = p_prev + log_sum_exp(T.log(Alpha) - T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x, 1)) / Sigma)
            return (p, a, x)
Ejemplo n.º 33
0
        def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu,
                                  b_mu, V_sigma, b_sigma, activation_factor,
                                  a_i, lp_accum, dP_da_ip1):
            B = T.cast(x_i.shape[0], theano.config.floatX)
            pot = a_i * activation_factor
            h = self.nonlinearity(pot)  # BxH

            z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha)
            z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)
            z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma)

            Alpha = T.nnet.softmax(z_alpha)  # BxC
            Mu = z_mu  # BxC
            Sigma = T.exp(z_sigma)  # BxC

            Phi = -T.log(
                2 * Sigma) - T.abs_(Mu - T.shape_padright(x_i, 1)) / Sigma
            wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0))

            lp_current = log_sum_exp(wPhi)
            # lp_current_sum = T.sum(lp_current)

            Pi = T.exp(wPhi - T.shape_padright(lp_current, 1))  # #
            dp_dz_alpha = Pi - Alpha  # BxC
            # dp_dz_alpha = T.grad(lp_current_sum, z_alpha)
            gb_alpha = dp_dz_alpha.mean(0, dtype=theano.config.floatX)  # C
            gV_alpha = T.dot(h.T, dp_dz_alpha) / B  # HxC

            # dp_dz_mu = T.grad(lp_current_sum, z_mu)
            dp_dz_mu = Pi * T.sgn(T.shape_padright(x_i, 1) - Mu) / Sigma
            # dp_dz_mu = dp_dz_mu * Sigma
            gb_mu = dp_dz_mu.mean(0, dtype=theano.config.floatX)
            gV_mu = T.dot(h.T, dp_dz_mu) / B

            # dp_dz_sigma = T.grad(lp_current_sum, z_sigma)
            dp_dz_sigma = Pi * (T.abs_(T.shape_padright(x_i, 1) - Mu) / Sigma -
                                1)
            gb_sigma = dp_dz_sigma.mean(0, dtype=theano.config.floatX)
            gV_sigma = T.dot(h.T, dp_dz_sigma) / B

            dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot(
                dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T)  # BxH
            if non_linearity_name == "sigmoid":
                dp_dpot = dp_dh * h * (1 - h)
            elif non_linearity_name == "RLU":
                dp_dpot = dp_dh * (pot > 0)

            gfact = (dp_dpot * a_i).sum(1).mean(
                0, dtype=theano.config.floatX)  # 1

            dP_da_i = dP_da_ip1 + dp_dpot * activation_factor  # BxH
            gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B

            return (a_i -
                    T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)),
                    lp_accum + lp_current, dP_da_i, gW, gb_alpha, gV_alpha,
                    gb_mu, gV_mu, gb_sigma, gV_sigma, gfact)
Ejemplo n.º 34
0
            def map_fn(image, image_shape, a, b, location, scale):
                # apply_inner expects a batch axis
                image = T.shape_padleft(image)
                location = T.shape_padleft(location)
                scale = T.shape_padleft(scale)

                patch = self.apply_inner(image, location, scale, a, b)

                # return without batch axis
                return patch[0]
Ejemplo n.º 35
0
                def map_fn(image, a, b, location, scale):
                    # apply_inner expects a batch axis
                    image = T.shape_padleft(image)
                    location = T.shape_padleft(location)
                    scale = T.shape_padleft(scale)

                    patch = self.apply_inner(image, location, scale, a, b)

                    # return without batch axis
                    return patch[0]
Ejemplo n.º 36
0
def viterbi(trans_probs):
    """
    :param trans_probs: N * max(L) * D * D tensor
    """

    N = trans_probs.shape[0]
    D = trans_probs.shape[-1]

    T1_0 = trans_probs[:, 0, 0]  # N * D matrix

    T2_0 = T.zeros((N, D))  # N * D matrix

    def step_forward(trans_probs_l, T1_lm1):

        T1_l = T.max(T.shape_padright(T1_lm1) * trans_probs_l,
                     axis=1)  # N * D matrix

        T2_l = T.argmax(T.shape_padright(T1_lm1) * trans_probs_l,
                        axis=1)  # N * D matrix

        return T.cast(T1_l, 'float32'), T.cast(T2_l, 'float32')

    ([T1, T2], _) = theano.scan(
        step_forward,
        sequences=trans_probs[:, 1:].dimshuffle((1, 0, 2, 3)),
        outputs_info=[T1_0, None],
    )
    # (max(L)-1) * N * D tensors

    T1 = T.concatenate([T.shape_padleft(T1_0), T1], axis=0)  # max(L) * N * D
    T2 = T.concatenate([T.shape_padleft(T2_0), T2], axis=0)  # max(L) * N * D

    char_L = T.cast(T.argmax(T1[-1], axis=1), 'float32')  # N

    def step_backward(T2_lp1, char_lp1):

        char_l = T2_lp1[T.arange(N), T.cast(char_lp1, 'int32')]  # N

        return T.cast(char_l, 'float32')

    chars, _ = theano.scan(
        step_backward,
        sequences=T2[1:][::-1],
        outputs_info=[char_L],
    )
    # (max(L)-1) * N

    chars = chars[::-1]  # (max(L)-1) * N

    chars = T.concatenate([chars, T.shape_padleft(char_L)],
                          axis=0).T  # N * max(L)

    probs = get_probs(chars, trans_probs)  # N * max(L)

    return chars, probs  # N * max(L) and N * max(L)
Ejemplo n.º 37
0
def BGAN(discriminator, g_output_logit, n_samples, trng, batch_size=64):
    d = OrderedDict()
    d['g_output_logit'] = g_output_logit
    g_output_logit_ = g_output_logit.reshape((-1, N_WORDS))
    d['g_output_logit_'] = g_output_logit_
    
    g_output = T.nnet.softmax(g_output_logit_)
    g_output = g_output.reshape((batch_size, L_GEN, N_WORDS))
    d['g_output'] = g_output
    
    p_t = T.tile(T.shape_padleft(g_output), (n_samples, 1, 1, 1))
    d['p_t'] = p_t
    p = p_t.reshape((-1, N_WORDS))
    d['p'] = p
    
    samples = trng.multinomial(pvals=p).astype(floatX)
    samples = theano.gradient.disconnected_grad(samples)
    samples = samples.reshape((n_samples, batch_size, L_GEN, N_WORDS))
    d['samples'] = samples
    
    D_r = lasagne.layers.get_output(discriminator)
    D_f = lasagne.layers.get_output(
        discriminator,
        samples.reshape((-1, L_GEN, N_WORDS)))
    D_f_ = D_f.reshape((n_samples, -1, L_GEN))
    d.update(D_r=D_r, D_f=D_f, D_f_=D_f_)
    
    log_d1 = -T.nnet.softplus(-D_f_)
    log_d0 = -(D_f_ + T.nnet.softplus(-D_f_))
    log_w = D_f_
    d.update(log_d1=log_d1, log_d0=log_d0, log_w=log_w)

    log_N = T.log(log_w.shape[0]).astype(log_w.dtype)
    log_Z_est = log_sum_exp(log_w - log_N, axis=0)
    log_Z_est = theano.gradient.disconnected_grad(log_Z_est)
    d['log_Z_est'] = log_Z_est

    log_g = (samples * (g_output_logit - log_sum_exp2(
        g_output_logit, axis=2))[None, :, :, :]).sum(axis=3)
    d['log_g'] = log_g
    
    log_N = T.log(log_w.shape[0]).astype(floatX)
    log_Z_est = log_sum_exp(log_w - log_N, axis=0)
    log_w_tilde = log_w - T.shape_padleft(log_Z_est) - log_N
    w_tilde = T.exp(log_w_tilde)
    w_tilde_ = theano.gradient.disconnected_grad(w_tilde)
    d.update(log_w_tilde=log_w_tilde, w_tilde=w_tilde)
    
    generator_loss = -(w_tilde_ * log_g).sum(0).mean()
    discriminator_loss = (T.nnet.softplus(-D_r)).mean() + (
        T.nnet.softplus(-D_f)).mean() + D_f.mean()
    d.update(generator_loss=generator_loss,
             discriminator_loss=discriminator_loss)
    
    return generator_loss, discriminator_loss, D_r, D_f, log_Z_est, d
Ejemplo n.º 38
0
    def get_output_for(self, input, **kwargs):
        output = T.zeros_like(input)
        for seg in range(0, self.num_segs):
            if self.tied_feamap:
                output += self.basisf(input, self.x_start + self.x_step * seg, self.x_start + self.x_step * (seg + 1)) \
                        * T.shape_padleft(T.shape_padright(self.W[seg], n_ones = len(input_dim) - 2))
            else:
                output += self.basisf(input, self.x_start + self.x_step * seg, self.x_start + self.x_step * (seg + 1)) \
                        * T.shape_padleft(self.W[seg])

        return output
Ejemplo n.º 39
0
def multinomial_BGAN(discriminator, g_output_logit, n_samples=None, log_Z=None,
                     batch_size=None, dim_c=None, dim_x=None, dim_y=None):
    
    g_output_logit_ = g_output_logit.transpose(0, 2, 3, 1)
    g_output_logit_ = g_output_logit_.reshape((-1, dim_c))
    
    g_output = T.nnet.softmax(g_output_logit_)
    g_output = g_output.reshape((batch_size, dim_x, dim_y, dim_c))
    
    p_t = T.tile(T.shape_padleft(g_output), (n_samples, 1, 1, 1, 1))
    p = p_t.reshape((-1, dim_c))
    
    samples = trng.multinomial(pvals=p).astype(floatX)
    samples = theano.gradient.disconnected_grad(samples)
    samples = samples.reshape((n_samples, batch_size, dim_x, dim_y, dim_c))
    samples = samples.transpose(0, 1, 4, 2, 3)
    
    real_out = lasagne.layers.get_output(discriminator)
    fake_out = lasagne.layers.get_output(
        discriminator, samples.reshape((-1, dim_c, dim_x, dim_y)))
    log_w = fake_out.reshape((n_samples, -1))

    log_g = ((samples * (g_output_logit - log_sum_exp(
        g_output_logit, axis=1, keepdims=True))[None, :, :, :, :])
        .sum(axis=(2, 3, 4)))
    
    log_N = T.log(log_w.shape[0]).astype(floatX)
    log_Z_est = log_sum_exp(log_w - log_N, axis=0)
    log_w_tilde = log_w - T.shape_padleft(log_Z_est) - log_N
    w_tilde = T.exp(log_w_tilde)
    w_tilde_ = theano.gradient.disconnected_grad(w_tilde)
    
    generator_loss = -(w_tilde_ * log_g).sum(0).mean()
    discriminator_loss = (T.nnet.softplus(-real_out)).mean() + (
        T.nnet.softplus(-fake_out)).mean() + fake_out.mean()

    g_results = {
        'g loss': generator_loss,
        'p(fake==0)': 1. - T.nnet.sigmoid(fake_out).mean(),
        'E[logit(fake)]': fake_out.mean(),
        'log w': log_w.mean(),
        'log w var': log_w.std() ** 2,
        'norm w': w_tilde.mean(),
        'norm w var': w_tilde.std() ** 2,
        'ESS': (1. / (w_tilde ** 2).sum(0)).mean(),
    }
    
    d_results = {
        'd loss': discriminator_loss,
        'p(real==1)': T.nnet.sigmoid(real_out).mean(),
        'E[logit(real)]': real_out.mean(),
    }
    
    return g_results, d_results, log_Z_est
Ejemplo n.º 40
0
    def get_output_for(self, input, **kwargs):
        output = input * T.gt(input, 0)
        for seg in range(0, self.num_segs):
            if self.tied_feamap:
                output += self.basisf(input, T.shape_padleft(T.shape_padright(self.P[seg], n_ones = len(input_dim) - 2))) \
                        * T.shape_padleft(T.shape_padright(self.W[seg], n_ones = len(input_dim) - 2))
            else:
                output += self.basisf(input, T.shape_padleft(self.P[seg])) \
                        * T.shape_padleft(self.W[seg])

        return output
Ejemplo n.º 41
0
def conv1d(vector, kernel, flip=False):
    """
    One-dimensional linear convolution of a vector with a 1d kernel.

    """
    return tt.nnet.conv2d(
        tt.shape_padleft(vector, 3),
        tt.shape_padleft(kernel, 3),
        input_shape=(1, 1, 1, -1),
        filter_shape=(1, 1, 1, -1),
        filter_flip=flip,
    )[0, 0, 0, :]
Ejemplo n.º 42
0
    def __init__(self, rng, input, filter_shape, image_shape, zero_pad=True, poolsize=(2, 2), read_file=False, W_input=None, b_input=None):
        
        assert image_shape[1] == filter_shape[1]

        fan_in = numpy.prod(filter_shape[1:])
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
                   numpy.prod(poolsize))
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(
            numpy.asarray(
                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                dtype=theano.config.floatX
            ),
            borrow=True
        )

        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        if read_file==True:
        	self.W = W_input
        	self.b = b_input

        if zero_pad==True:
            input=input.transpose(2, 0, 1, 3)
            input=T.concatenate([T.shape_padleft(T.zeros_like(input[0]), 1), input, T.shape_padleft(T.zeros_like(input[0]), 1)], axis=0)
            input=input.transpose(1, 2, 0, 3)
            input=input.transpose(3, 0, 1, 2)
            input=T.concatenate([T.shape_padleft(T.zeros_like(input[0]), 1), input, T.shape_padleft(T.zeros_like(input[0]), 1)], axis=0)
            input=input.transpose(1, 2, 3, 0)
        self.input = input
        image_shape = (image_shape[0], image_shape[1], image_shape[2]+2, image_shape[3]+2)

        conv_out = conv.conv2d(
            input=self.input,
            filters=self.W,
            filter_shape=filter_shape,
            image_shape=image_shape,
            border_mode='valid'
        )

        pooled_out = downsample.max_pool_2d(
            input=conv_out,
            ds=poolsize,
            ignore_border=True
        )

        self.switch = T.abs_(1 - T.sgn(T.abs_(conv_out - pooled_out.repeat(2, axis=2).repeat(2, axis=3))))

        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))

        self.params = [self.W, self.b]
Ejemplo n.º 43
0
def binary_BGAN(discriminator, g_output_logit, n_samples=None, log_Z=None,
                batch_size=None, dim_c=None, dim_x=None, dim_y=None):
    '''Discrete BGAN for discrete binary variables.
    
    '''
    # Sample from a uniform distribution and generate samples over input.
    R = trng.uniform(size=(n_samples, batch_size, dim_c, dim_x, dim_y),
                     dtype=floatX)
    g_output = T.nnet.sigmoid(g_output_logit)
    samples = (R <= T.shape_padleft(g_output)).astype(floatX)

    # Feed samples through the discriminator.
    real_out = lasagne.layers.get_output(discriminator)
    fake_out = lasagne.layers.get_output(
        discriminator, samples.reshape((-1, dim_c, dim_x, dim_y)))
    log_w = fake_out.reshape((n_samples, batch_size))
    
    # Get the log probabilities of the samples.
    log_g = -((1. - samples) * T.shape_padleft(g_output_logit)
             + T.shape_padleft(T.nnet.softplus(-g_output_logit))).sum(
        axis=(2, 3, 4))
    
    # Get the normalized weights.
    log_N = T.log(log_w.shape[0]).astype(floatX)
    log_Z_est = log_sum_exp(log_w - log_N, axis=0)
    log_w_tilde = log_w - T.shape_padleft(log_Z_est) - log_N
    w_tilde = T.exp(log_w_tilde)
    w_tilde_ = theano.gradient.disconnected_grad(w_tilde)

    # Losses.
    generator_loss = -(w_tilde_ * log_g).sum(0).mean()
    discriminator_loss = (T.nnet.softplus(-real_out)).mean() + (
        T.nnet.softplus(-fake_out)).mean() + fake_out.mean()
    
    g_results = {
        'g loss': generator_loss,
        'p(fake==0)': 1. - T.nnet.sigmoid(fake_out).mean(),
        'E[logit(fake)]': fake_out.mean(),
        'log w': log_w.mean(),
        'log w var': log_w.std() ** 2,
        'norm w': w_tilde.mean(),
        'norm w var': w_tilde.std() ** 2,
        'ESS': (1. / (w_tilde ** 2).sum(0)).mean(),
    }
    
    d_results = {
        'd loss': discriminator_loss,
        'p(real==1)': T.nnet.sigmoid(real_out).mean(),
        'E[logit(real)]': real_out.mean(),
    }
    
    return g_results, d_results, log_Z_est
Ejemplo n.º 44
0
        def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activation_factor, a_i, lp_accum, dP_da_ip1):
            B = T.cast(x_i.shape[0], theano.config.floatX)
            pot = a_i * activation_factor
            h = self.nonlinearity(pot)  # BxH

            z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha)
            z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)
            z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma)

            Alpha = T.nnet.softmax(z_alpha)  # BxC
            Mu = z_mu  # BxC
            Sigma = T.exp(z_sigma)  # BxC

            Phi = -T.log(2 * Sigma) - T.abs_(Mu - T.shape_padright(x_i, 1)) / Sigma
            wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0))

            lp_current = log_sum_exp(wPhi)
            # lp_current_sum = T.sum(lp_current)

            Pi = T.exp(wPhi - T.shape_padright(lp_current, 1))  # #
            dp_dz_alpha = Pi - Alpha  # BxC
            # dp_dz_alpha = T.grad(lp_current_sum, z_alpha)
            gb_alpha = dp_dz_alpha.mean(0, dtype=theano.config.floatX)  # C
            gV_alpha = T.dot(h.T, dp_dz_alpha) / B  # HxC

            # dp_dz_mu = T.grad(lp_current_sum, z_mu)
            dp_dz_mu = Pi * T.sgn(T.shape_padright(x_i, 1) - Mu) / Sigma
            # dp_dz_mu = dp_dz_mu * Sigma
            gb_mu = dp_dz_mu.mean(0, dtype=theano.config.floatX)
            gV_mu = T.dot(h.T, dp_dz_mu) / B

            # dp_dz_sigma = T.grad(lp_current_sum, z_sigma)
            dp_dz_sigma = Pi * (T.abs_(T.shape_padright(x_i, 1) - Mu) / Sigma - 1)
            gb_sigma = dp_dz_sigma.mean(0, dtype=theano.config.floatX)
            gV_sigma = T.dot(h.T, dp_dz_sigma) / B

            dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot(dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T)  # BxH
            if non_linearity_name == "sigmoid":
                dp_dpot = dp_dh * h * (1 - h)
            elif non_linearity_name == "RLU":
                dp_dpot = dp_dh * (pot > 0)

            gfact = (dp_dpot * a_i).sum(1).mean(0, dtype=theano.config.floatX)  # 1

            dP_da_i = dP_da_ip1 + dp_dpot * activation_factor  # BxH
            gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B

            return (a_i - T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)),
                    lp_accum + lp_current,
                    dP_da_i,
                    gW, gb_alpha, gV_alpha, gb_mu, gV_mu, gb_sigma, gV_sigma, gfact)
Ejemplo n.º 45
0
        def density_and_gradients(x_i, x_im1, w_i, V_alpha, b_alpha, V_mu, b_mu, V_sigma, b_sigma, activation_factor, a_i, lp_accum, dP_da_ip1):
            B = T.cast(x_i.shape[0], floatX)
            pot = a_i * activation_factor
            h = self.nonlinearity(pot)  # BxH

            z_alpha = T.dot(h, V_alpha) + T.shape_padleft(b_alpha)
            z_mu = T.dot(h, V_mu) + T.shape_padleft(b_mu)
            z_sigma = T.dot(h, V_sigma) + T.shape_padleft(b_sigma)

            Alpha = T.nnet.softmax(z_alpha)  # BxC
            Mu = z_mu  # BxC
            Sigma = T.exp(z_sigma)  # BxC

            Phi = -constantX(0.5) * T.sqr((Mu - T.shape_padright(x_i, 1)) / Sigma) - T.log(Sigma) - constantX(0.5 * numpy.log(2 * numpy.pi))
            wPhi = T.maximum(Phi + T.log(Alpha), constantX(-100.0))

            lp_current = -log_sum_exp(wPhi)  # negative log likelihood
            # lp_current_sum = T.sum(lp_current)

            Pi = T.exp(wPhi - T.shape_padright(lp_current, 1))  # #
            dp_dz_alpha = Pi - Alpha  # BxC
            # dp_dz_alpha = T.grad(lp_current_sum, z_alpha)
            gb_alpha = dp_dz_alpha.mean(0, dtype=floatX)  # C
            gV_alpha = T.dot(h.T, dp_dz_alpha) / B  # HxC

            dp_dz_mu = -Pi * (Mu - T.shape_padright(x_i, 1)) / T.sqr(Sigma)
            # dp_dz_mu = T.grad(lp_current_sum, z_mu)
            dp_dz_mu = dp_dz_mu * Sigma  # Heuristic
            gb_mu = dp_dz_mu.mean(0, dtype=floatX)
            gV_mu = T.dot(h.T, dp_dz_mu) / B

            dp_dz_sigma = Pi * (T.sqr(T.shape_padright(x_i, 1) - Mu) / T.sqr(Sigma) - 1)
            # dp_dz_sigma = T.grad(lp_current_sum, z_sigma)
            gb_sigma = dp_dz_sigma.mean(0, dtype=floatX)
            gV_sigma = T.dot(h.T, dp_dz_sigma) / B

            dp_dh = T.dot(dp_dz_alpha, V_alpha.T) + T.dot(dp_dz_mu, V_mu.T) + T.dot(dp_dz_sigma, V_sigma.T)  # BxH
            if self.hidden_act == "sigmoid":
                dp_dpot = dp_dh * h * (1 - h)
            elif self.hidden_act == "ReLU":
                dp_dpot = dp_dh * (pot > 0)

            gfact = (dp_dpot * a_i).sum(1).mean(0, dtype=floatX)  # 1

            dP_da_i = dP_da_ip1 + dp_dpot * activation_factor  # BxH
            gW = T.dot(T.shape_padleft(x_im1, 1), dP_da_i).flatten() / B

            return (a_i - T.dot(T.shape_padright(x_im1, 1), T.shape_padleft(w_i, 1)),
                    lp_accum + lp_current,
                    dP_da_i,
                    gW, gb_alpha, gV_alpha, gb_mu, gV_mu, gb_sigma, gV_sigma, gfact)
Ejemplo n.º 46
0
Archivo: lds.py Proyecto: ddofer/breze
def smooth(filtered_mean, filtered_cov, transition,
           hidden_noise_mean, hidden_noise_cov):
    step = backward_step(transition, hidden_noise_mean, hidden_noise_cov)

    (g, G), _ = theano.scan(
        step,
        sequences=[filtered_mean[:-1], filtered_cov[:-1]],
        outputs_info=[filtered_mean[-1], filtered_cov[-1]],
        go_backwards=True)

    g = T.concatenate([T.shape_padleft(filtered_mean[-1]), g])
    G = T.concatenate([T.shape_padleft(filtered_cov[-1]), G])

    return g[::-1], G[::-1]
Ejemplo n.º 47
0
Archivo: ttv.py Proyecto: dfm/exoplanet
 def _warp_times(self, t):
     delta = tt.shape_padleft(t) / tt.shape_padright(self.period, t.ndim)
     delta += tt.shape_padright(self._base_time, t.ndim)
     ind = tt.cast(tt.floor(delta), "int64")
     dt = tt.stack([ttv[tt.clip(ind[i], 0, ttv.shape[0]-1)]
                    for i, ttv in enumerate(self.ttvs)], -1)
     return tt.shape_padright(t) + dt
Ejemplo n.º 48
0
  def build_NADE(self,v, W, V, b, c):
    a = T.shape_padright(v) * T.shape_padleft(W)
    a = a.dimshuffle(1, 0, 2)

    c_init = c
    if c.ndim == 1:
        c_init = T.dot(T.ones((v.shape[0], 1)), T.shape_padleft(c))

    (activations, s), updates = theano.scan(lambda V_i, a_i, partial_im1: (a_i + partial_im1, T.dot(V_i, T.nnet.sigmoid(partial_im1.T))), 
                                                   sequences=[V.T, a], outputs_info=[c_init, None])
    s = s.T + b
    y = T.nnet.sigmoid(s)

    cost = -v*T.log(y) - (1-v)*T.log(1-y)
    cost = cost.sum() / v.shape[0]
    return s, y, cost
Ejemplo n.º 49
0
def _forward_vars(activations, ttargets):
    """Calculate the CTC forward variables: for each example, a matrix of 
       shape (sequence length, target length) where entry (t,u) corresponds 
       to the log-probability of the network predicting the target sequence 
       prefix [0:u] by time t."""

    ttargets = T.cast(ttargets, 'int32')

    activations = T.log(activations)

    # For each example, a matrix of shape (seq len, target len) with values
    # corresponding to activations at each time and sequence position.
    probs = activations[:, T.shape_padleft(T.arange(activations.shape[1])).T, ttargets]

    initial_probs = _initial_probabilities(
        probs.shape[1], 
        ttargets.shape[1])

    skip_allowed = _skip_allowed(ttargets)

    def step(p_curr, p_prev):
        no_change = p_prev
        next_label = helpers.right_shift_rows(p_prev, 1, _LOG_ZERO)
        skip = helpers.right_shift_rows(p_prev + skip_allowed, 2, _LOG_ZERO)

        return p_curr + _log_add_3(no_change, next_label, skip)

    probabilities, _ = theano.scan(
        step,
        sequences=[probs],
        outputs_info=[initial_probs]
    )

    return probabilities
Ejemplo n.º 50
0
 def one_hot(self,t, r=None):
     if r is None:
         r = T.max(t) + 1
         
     ranges = T.shape_padleft(T.arange(r), t.ndim)
     
     return T.cast(T.eq(ranges, T.shape_padright(t, 1)) ,dtype =theano.config.floatX)       
        def predict_sequence(x, lengths, return_all=False, return_memory=False):
            if x.ndim > 1:
                outputs_info = [None] + [dict(initial=T.repeat(T.shape_padleft(layer.initial_hidden_state), x.shape[0], axis=0), taps=[-1]) for layer in model.layers if hasattr(layer, 'initial_hidden_state')]
            else:
                outputs_info = [None] + [dict(initial=layer.initial_hidden_state, taps=[-1]) for layer in model.layers if hasattr(layer, 'initial_hidden_state')]
            outputs_info = outputs_info + [None]
            result, updates = theano.scan(step,
                                          sequences = [x.T if x.ndim > 1 else x],
                                          outputs_info = outputs_info)

            if return_all:
                return result
            else:
                res = result[-1].dimshuffle(1, 0, 2) if x.ndim > 1 else result[-1]
                
                price_preds = self.price_model.forward(
                                self.model.layers[-2].postprocess_activation(
                                    result[-2][lengths, T.arange(0, lengths.shape[0])]
                                ), None, []
                            )[-1][:,0] if x.ndim > 1 else \
                            self.price_model.forward(
                                self.model.layers[-2].postprocess_activation(
                                    result[-2][-1]
                            ), None, [])[-1][0]
                # gate values can be obtained by asking for them from the stacked cells
                if return_memory:
                    return result[0], res, price_preds
                else:
                    return res, price_preds
Ejemplo n.º 52
0
def blk_tridag_chol(A, B):
    '''
    Compute the cholesky decompoisition of a symmetric, positive definite
    block-tridiagonal matrix.

    Inputs:
    A - [T x n x n]   tensor, where each A[i,:,:] is the ith block diagonal matrix 
    B - [T-1 x n x n] tensor, where each B[i,:,:] is the ith (upper) 1st block 
        off-diagonal matrix

    Outputs: 
    R - python list with two elements
        * R[0] - [T x n x n] tensor of block diagonal elements of Cholesky decomposition
        * R[1] - [T-1 x n x n] tensor of (lower) 1st block off-diagonal elements of Cholesky

    '''

    # Code for computing the cholesky decomposition of a symmetric block tridiagonal matrix
    def compute_chol(Aip1, Bi, Li, Ci):
        Ci = T.dot(Bi.T, Tla.matrix_inverse(Li).T)
        Dii = Aip1 - T.dot(Ci, Ci.T)
        Lii = Tsla.cholesky(Dii)
        return [Lii,Ci]

    L1 = Tsla.cholesky(A[0])
    C1 = T.zeros_like(B[0])

    # this scan returns the diagonal and off-diagonal blocks of the cholesky decomposition
    mat, updates = theano.scan(fn=compute_chol, sequences=[A[1:], B], outputs_info=[L1,C1])

    mat[0] = T.concatenate([T.shape_padleft(L1), mat[0]])
    return mat
Ejemplo n.º 53
0
def _e_step(psamples, W_list, b_list, n_steps=100, eps=1e-5):
    """
    Performs 'n_steps' of mean-field inference (used to compute positive phase
    statistics)

    Parameters
    ----------
    psamples : array-like object of theano shared variables
        State of each layer of the DBM (during the inference process).
        psamples[0] points to the input
    n_steps :  integer
        Number of iterations of mean-field to perform
    """
    depth = len(psamples)

    new_psamples = [T.unbroadcast(T.shape_padleft(psample)) for psample in psamples]

    # now alternate mean-field inference for even/odd layers
    def mf_iteration(*psamples):
        new_psamples = [p for p in psamples]
        for i in xrange(1, depth, 2):
            new_psamples[i] = hi_given(psamples, i, W_list, b_list)
        for i in xrange(2, depth, 2):
            new_psamples[i] = hi_given(psamples, i, W_list, b_list)

        score = 0.0
        for i in xrange(1, depth):
            score = T.maximum(T.mean(abs(new_psamples[i] - psamples[i])), score)

        return new_psamples, theano.scan_module.until(score < eps)

    new_psamples, updates = scan(mf_iteration, states=new_psamples, n_steps=n_steps)

    return [x[0] for x in new_psamples]
Ejemplo n.º 54
0
 def cost(self):
   """
   :rtype: (theano.Variable | None, dict[theano.Variable,theano.Variable] | None)
   :returns: cost, known_grads
   """
   known_grads = None
   if self.loss == 'ce' or self.loss == 'priori':
     if self.attrs.get("target", "").endswith("[sparse:coo]"):
       assert isinstance(self.y, tuple)
       assert len(self.y) == 3
       from NativeOp import crossentropy_softmax_and_gradient_z_sparse
       y_mask = self.network.j[self.attrs.get("target", "").replace("[sparse:coo]", "[sparse:coo:2:0]")]
       ce, grad_z = crossentropy_softmax_and_gradient_z_sparse(
         self.z, self.index, self.y[0], self.y[1], self.y[2], y_mask)
       return self.norm * T.sum(ce), {self.z: grad_z}
     if self.y_data_flat.type == T.ivector().type:
       # Use crossentropy_softmax_1hot to have a more stable and more optimized gradient calculation.
       # Theano fails to use it automatically; I guess our self.i indexing is too confusing.
       #idx = self.index.flatten().dimshuffle(0,'x').repeat(self.y_m.shape[1],axis=1) # faster than line below
       #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m * idx, y_idx=self.y_data_flat * self.index.flatten())
       nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y_data_flat[self.i])
       #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat)
       #nll = -T.log(T.nnet.softmax(self.y_m)[self.i,self.y_data_flat[self.i]])
       #z_c = T.exp(self.z[:,self.y])
       #nll = -T.log(z_c / T.sum(z_c,axis=2,keepdims=True))
       #nll, pcx = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat)
       #nll = T.set_subtensor(nll[self.j], T.constant(0.0))
     else:
       nll = -T.dot(T.log(T.clip(self.p_y_given_x[self.i], 1.e-38, 1.e20)), self.y_data_flat[self.i].T)
     return self.norm * T.sum(nll), known_grads
   elif self.loss == 'entropy':
     h_e = T.exp(self.y_m) #(TB)
     pcx = T.clip((h_e / T.sum(h_e, axis=1, keepdims=True)).reshape((self.index.shape[0],self.index.shape[1],self.attrs['n_out'])), 1.e-6, 1.e6) # TBD
     ee = -T.sum(pcx[self.i] * T.log(pcx[self.i])) # TB
     #nll, pcxs = T.nnet.crossentropy_softmax_1hot(x=self.y_m[self.i], y_idx=self.y[self.i])
     nll, _ = T.nnet.crossentropy_softmax_1hot(x=self.y_m, y_idx=self.y_data_flat) # TB
     ce = nll.reshape(self.index.shape) * self.index # TB
     y = self.y_data_flat.reshape(self.index.shape) * self.index # TB
     f = T.any(T.gt(y,0), axis=0) # B
     return T.sum(f * T.sum(ce, axis=0) + (1-f) * T.sum(ee, axis=0)), known_grads
     #return T.sum(T.switch(T.gt(T.sum(y,axis=0),0), T.sum(ce, axis=0), -T.sum(ee, axis=0))), known_grads
     #return T.switch(T.gt(T.sum(self.y_m[self.i]),0), T.sum(nll), -T.sum(pcx * T.log(pcx))), known_grads
   elif self.loss == 'priori':
     pcx = self.p_y_given_x[self.i, self.y_data_flat[self.i]]
     pcx = T.clip(pcx, 1.e-38, 1.e20)  # For pcx near zero, the gradient will likely explode.
     return -T.sum(T.log(pcx)), known_grads
   elif self.loss == 'sse':
     if self.y_data_flat.dtype.startswith('int'):
       y_f = T.cast(T.reshape(self.y_data_flat, (self.y_data_flat.shape[0] * self.y_data_flat.shape[1]), ndim=1), 'int32')
       y_oh = T.eq(T.shape_padleft(T.arange(self.attrs['n_out']), y_f.ndim), T.shape_padright(y_f, 1))
       return T.mean(T.sqr(self.p_y_given_x[self.i] - y_oh[self.i])), known_grads
     else:
       #return T.sum(T.sum(T.sqr(self.y_m - self.y.reshape(self.y_m.shape)), axis=1)[self.i]), known_grads
       return T.sum(T.sqr(self.y_m[self.i] - self.y_data_flat.reshape(self.y_m.shape)[self.i])), known_grads
       #return T.sum(T.sum(T.sqr(self.z - (self.y.reshape((self.index.shape[0], self.index.shape[1], self.attrs['n_out']))[:self.z.shape[0]])), axis=2).flatten()[self.i]), known_grads
       #y_z = T.set_subtensor(T.zeros((self.index.shape[0],self.index.shape[1],self.attrs['n_out']), dtype='float32')[:self.z.shape[0]], self.z).flatten()
       #return T.sum(T.sqr(y_z[self.i] - self.y[self.i])), known_grads
       #return T.sum(T.sqr(self.y_m - self.y[:self.z.shape[0]*self.index.shape[1]]).flatten()[self.i]), known_grads
   else:
     assert False, "unknown loss: %s" % self.loss
Ejemplo n.º 55
0
Archivo: misc.py Proyecto: Wiebke/breze
def project_into_l2_ball(arr, radius=1):
    """Return ``arr`` projected into the L2 ball.

    Parameters
    ----------

    arr : Theano variable
        Array of shape either ``(n, d)`` or ``(d,)``. If the former, all rows
        are projected individually.

    radius : float, optional [default: 1]


    Returns
    -------

    res : Theano variable
        Projected result of the same shape as ``arr``.
    """
    # Distinguish whether we are given a single or many vectors to work upon.
    batch = arr.ndim == 2
    if not batch:
        arr = T.shape_padleft(arr)

    lengths = T.sqrt((arr ** 2).sum(axis=1)).dimshuffle(0, 'x')
    arr = T.switch(lengths > T.sqrt(radius), arr / lengths * radius, arr)

    if not batch:
        arr = arr[0]

    return arr
Ejemplo n.º 56
0
def local_gpualloc(node):
    replace = False
    if node.op == tensor.alloc:
        if node.inputs[0].owner and node.inputs[0].owner.op == host_from_gpu:
            replace = True
        elif all([c != 'output' and c.op == gpu_from_host
                  for c, idx in node.outputs[0].clients]):
            replace = True
        elif all([c != 'output' and c.op == tensor.join and
                  all([i.owner and i.owner.op in [host_from_gpu, tensor.alloc]
                       for i in c.inputs[1:]])
                  for c, idx in node.outputs[0].clients]):
            replace = True
    if replace:
        val = node.inputs[0]
        shp = node.inputs[1:]
        old_out = node.outputs[0]
        val2 = tensor.shape_padleft(val, len(shp) - val.ndim)
        new_out = host_from_gpu(gpu_alloc(val, *shp))
        if new_out.type != old_out.type:
            assert new_out.type.ndim == old_out.type.ndim
            assert new_out.type.dtype == old_out.type.dtype
            for b_old, b_new in zip(old_out.type.broadcastable,
                                    new_out.type.broadcastable):
                assert b_new or (not b_old)
            new_out = tensor.patternbroadcast(new_out. old_out.broadcastable)

        return [new_out]