Beispiel #1
0
    def forward(self, x):
        n_batch, n_atom, n_feature = x.shape
        atom_repeat = functions.reshape(x, (n_batch, 1, n_atom, n_feature))
        atom_repeat = functions.broadcast_to(
            atom_repeat, (n_batch, n_atom, n_atom, n_feature))
        atom_repeat = functions.reshape(atom_repeat,
                                        (n_batch, n_atom * n_atom, n_feature))

        atom_tile = functions.reshape(x, (n_batch, n_atom, 1, n_feature))
        atom_tile = functions.broadcast_to(
            atom_tile, (n_batch, n_atom, n_atom, n_feature))
        atom_tile = functions.reshape(atom_tile,
                                      (n_batch, n_atom * n_atom, n_feature))

        pair_x0 = functions.concat((atom_tile, atom_repeat), axis=2)
        pair_x0 = functions.reshape(pair_x0,
                                    (n_batch * n_atom * n_atom, n_feature * 2))
        for l in self.linear_layers:
            pair_x0 = l(pair_x0)
            pair_x0 = functions.relu(pair_x0)
        pair_x0 = functions.reshape(pair_x0,
                                    (n_batch, n_atom * n_atom, self.n_channel))

        pair_x1 = functions.concat((atom_repeat, atom_tile), axis=2)
        pair_x1 = functions.reshape(pair_x1,
                                    (n_batch * n_atom * n_atom, n_feature * 2))
        for l in self.linear_layers:
            pair_x1 = l(pair_x1)
            pair_x1 = functions.relu(pair_x1)
        pair_x1 = functions.reshape(pair_x1,
                                    (n_batch, n_atom * n_atom, self.n_channel))
        return pair_x0 + pair_x1
Beispiel #2
0
    def __call__(self, h, dist):
        """
        Args:
            h (numpy.ndarray): axis 0 represents minibatch index,
                axis 1 represents atom_index and axis2 represents
                feature dimension.
            dist (numpy.ndarray): axis 0 represents minibatch index,
                axis 1 and 2 represent distance between atoms.

        """
        mb, atom, ch = h.shape
        if ch != self.hidden_dim:
            raise ValueError('h.shape[2] {} and hidden_dim {} must be same!'
                             .format(ch, self.hidden_dim))
        embedlist = self.xp.arange(
            self.num_rbf).astype('f') * self.radius_resolution
        dist = functions.reshape(dist, (mb, atom, atom, 1))
        dist = functions.broadcast_to(dist, (mb, atom, atom, self.num_rbf))
        dist = functions.exp(- self.gamma * (dist - embedlist) ** 2)
        dist = functions.reshape(dist, (-1, self.num_rbf))
        dist = self.dense1(dist)
        dist = functions.softplus(dist)
        dist = self.dense2(dist)
        dist = functions.softplus(dist)
        dist = functions.reshape(dist, (mb, atom, atom, self.hidden_dim))
        h = functions.reshape(h, (mb, atom, 1, self.hidden_dim))
        h = functions.broadcast_to(h, (mb, atom, atom, self.hidden_dim))
        h = functions.sum(h * dist, axis=1)
        return h
Beispiel #3
0
 def __call__(self, x):
     # chainer requires explicit broadcast for avoiding latent bugs
     u = F.mean(x, -1, keepdims=True)
     u = F.broadcast_to(u, x.shape)
     s = F.mean((x - u) ** 2, -1, keepdims=True)
     s = F.broadcast_to(s, x.shape)
     x = (x - u) / F.sqrt(s + self.e)
     return F.bias(F.scale(x, self.g, axis=2), self.b, axis=2)
def cosine_similarity(x, y, eps=1e-6):
    n1, n2, n3 = x.data.shape
    _, m2, _ = y.data.shape
    z = F.batch_matmul(x, y, transb=True)
    x2 = F.broadcast_to(F.reshape(F.sum(x * x, axis=2), (n1, n2, 1)), (n1, n2, m2))
    y2 = F.broadcast_to(F.reshape(F.sum(y * y, axis=2), (n1, 1, m2)), (n1, n2, m2))
    z /= F.exp(F.log(x2 * y2 + eps) / 2)
    return z
Beispiel #5
0
    def norm_by_freq(self, freq):
        word_embs = self.W
        mean = F.sum(freq * word_embs, axis=0, keepdims=True)
        mean = F.broadcast_to(mean, word_embs.shape)
        var = F.sum(freq * ((word_embs - mean) ** 2), axis=0, keepdims=True)
        var = F.broadcast_to(var, word_embs.shape)

        stddev = F.sqrt(1e-6 + var)
        word_embs_norm = (word_embs - mean) / stddev
        return word_embs_norm
    def compute_dists(self, obs):
        mean_var = Variable(self.running_stat.mean.astype(np.float32))
        std_var = Variable(self.running_stat.std.astype(np.float32))

        obs = obs - F.broadcast_to(mean_var, obs.shape)
        obs = obs / (F.broadcast_to(std_var, obs.shape) + 1e-8)

        if self.clip is not None:
            obs = F.clip(obs, -self.clip, self.clip)

        return self.policy.compute_dists(obs)
Beispiel #7
0
def get_normalized_vector(d, xp=None):
    shape = tuple(range(1, len(d.shape)))
    if xp is not None:
        d /= (1e-12 + xp.max(xp.abs(d), shape, keepdims=True))
        d /= xp.sqrt(1e-6 + xp.sum(d ** 2, shape, keepdims=True))
    else:
        d_term = 1e-12 + F.max(F.absolute(d), shape, keepdims=True)
        d /= F.broadcast_to(d_term, d.shape)
        d_term = F.sqrt(1e-6 + F.sum(d ** 2, shape, keepdims=True))
        d /= F.broadcast_to(d_term, d.shape)
    return d
Beispiel #8
0
 def query(self, u):
     xp = cuda.get_array_module(u)
     size = self.m.shape[1]
     inds = xp.arange(size - 1, -1, -1, dtype=numpy.int32)
     tm = self.TA(inds)
     tc = self.TC(inds)
     tm = F.broadcast_to(tm, self.m.shape)
     tc = F.broadcast_to(tc, self.c.shape)
     p = F.softmax(F.batch_matmul(self.m + tm, u))
     o = F.batch_matmul(F.swapaxes(self.c + tc, 2, 1), p)
     o = F.squeeze(o, -1)
     u = o + u
     return u
Beispiel #9
0
 def __call__(self, x):
     """Applies the linear layer.
     Args:
         x (~chainer.Variable): Batch of input vectors.
     Returns:
         ~chainer.Variable: Output of the linear layer.
     """
     norm = F.batch_l2_norm_squared(self.W) ** 0.5
     norm_broadcasted = F.broadcast_to(
         F.expand_dims(norm, 1), self.W.data.shape)
     g_broadcasted = F.broadcast_to(
         F.expand_dims(self.g, 1), self.W.data.shape)
     return F.linear(x, g_broadcasted * self.W / norm_broadcasted, self.b)
Beispiel #10
0
 def query(self, u):
     m = self.m
     c = self.c
     batch, size = m.data.shape[:2]
     inds = chainer.Variable(xp.arange(size, dtype=numpy.int32)[::-1])
     tm = self.TA(inds)
     tc = self.TC(inds)
     tm = F.broadcast_to(tm, (batch,) + tm.data.shape)
     tc = F.broadcast_to(tc, (batch,) + tc.data.shape)
     p = F.softmax(F.batch_matmul(m + tm, u))
     o = F.batch_matmul(F.swapaxes(c + tc, 2, 1), p)
     o = F.reshape(o, (batch, m.data.shape[2]))
     u = o + u
     return u
def perspective(vertices, angle=30.):
    assert (vertices.ndim == 3)
    xp = chainer.cuda.get_array_module(vertices)
    if isinstance(angle, float) or isinstance(angle, int):
        angle = chainer.Variable(xp.array(angle, 'float32'))
    angle = angle / 180. * 3.1416
    angle = cf.broadcast_to(angle[None], (vertices.shape[0],))

    width = cf.tan(angle)
    width = cf.broadcast_to(width[:, None], vertices.shape[:2])
    z = vertices[:, :, 2]
    x = vertices[:, :, 0] / z / width
    y = vertices[:, :, 1] / z / width
    vertices = cf.concat((x[:, :, None], y[:, :, None], z[:, :, None]), axis=2)
    return vertices
Beispiel #12
0
    def __call__(self, x, y):
        """
        Parameters
        -----------------
        x: Variable
            Feature of unlabeled samples.
        y: Variable
            Feature of unlabeled samples.
        """

        g = F.broadcast_to(
            F.gaussian(
                np.array([0], dtype=np.float32),
                np.array([np.exp(1)], dtype=np.float32)), x.shape)
            
        x_g = x * g
        y_g = y * g

        x_g_norm = F.sum(x_g**2, axis=1)  
        y_g_norm = F.sum(y_g**2, axis=1)
        x_g_y_g = F.linear(x_g, y_g)
        
        x_g_norm, x_g_y_g, y_g_norm = \
                                      F.broadcast(
                                          *[x_g_norm,
                                            x_g_y_g,
                                            F.expand_dims(y_g_norm, 1)])
        #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm))
        return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
Beispiel #13
0
 def __call__(self, x, context):
     e = model.embed(context)
     shape = e.data.shape
     x = F.broadcast_to(x, (shape[0], shape[1]))
     e = F.reshape(e, (shape[0] * shape[1], shape[2]))
     x = F.reshape(x, (shape[0] * shape[1],))
     return self.loss_func(e, x)
Beispiel #14
0
def ordinal_loss(y, mask):
    xp = cuda.get_array_module(y.data)
    volatile = y.volatile
    b, c, n = y.data.shape
    max_y = F.broadcast_to(F.max(y, axis=1, keepdims=True), y.data.shape)
    y = y - max_y
    sum_y = F.broadcast_to(F.expand_dims(F.sum(y, axis=1), 1), y.data.shape)
    down_tri = np.tri(c, dtype=np.float32)
    up_tri = down_tri.T
    w1 = Variable(xp.asarray(down_tri.reshape(c, c, 1, 1)), volatile=volatile)
    w2 = Variable(xp.asarray(up_tri.reshape(c, c, 1, 1)), volatile=volatile)
    h = F.exp(F.expand_dims(y, -1))
    h1 = F.convolution_2d(h, w1)
    h1 = F.convolution_2d(F.log(h1), w1)
    h2 = F.convolution_2d(h, w2)
    h2 = F.convolution_2d(F.log(h2), w2)
    h = F.reshape(h1 + h2, (b, c, n))
    return F.sum((h - sum_y - y) * mask) / b
Beispiel #15
0
 def __call__(self, x, contexts):
     e = self.embed(contexts)
     batch_size, n_context, n_units = e.shape
     x = F.broadcast_to(x[:, None], (batch_size, n_context))
     e = F.reshape(e, (batch_size * n_context, n_units))
     x = F.reshape(x, (batch_size * n_context,))
     loss = self.loss_func(e, x)
     reporter.report({'loss': loss}, self)
     return loss
Beispiel #16
0
 def __call__(self, x, context):
     e = self.embed(context)
     shape = e.data.shape
     x = F.broadcast_to(x[:, None], (shape[0], shape[1]))
     e = F.reshape(e, (shape[0] * shape[1], shape[2]))
     x = F.reshape(x, (shape[0] * shape[1],))
     loss = self.loss_func(e, x)
     reporter.report({'loss': loss}, self)
     return loss
Beispiel #17
0
def _bias(x, y, axis=1):
    x_shape = x.data.shape
    y_shape = y.data.shape
    assert x_shape[axis:axis + len(y_shape)] == y_shape
    y1_shape = tuple([1] * axis + list(y_shape) +
                     [1] * (len(x_shape) - axis - len(y_shape)))
    y1 = functions.reshape(y, y1_shape)
    y2 = functions.broadcast_to(y1, x_shape)
    return x + y2
Beispiel #18
0
    def _calc_distmat(self, h):
        bs = h.shape[0]
        
        h_l2_2 = F.sum(h**2, axis=1)
        H = F.broadcast_to(h_l2_2, (bs, bs))
        H_t = F.transpose(H)
        XX = F.linear(h, h)

        return (H_t - 2*XX + H)
Beispiel #19
0
 def __call__(self, x, context):
     e = self.embed(context)
     shape = e.shape
     x = F.broadcast_to(x[:, None], (shape[0], shape[1]))
     e = F.reshape(e, (shape[0] * shape[1], shape[2]))
     x = F.reshape(x, (shape[0] * shape[1],))
     loss = self.loss_func(e, x)
     # shouldn't we divide loss by batch size?
     reporter.report({'loss': loss}, self)
     return loss
Beispiel #20
0
 def __call__(self, embeded_x, m_prev, h_prev, x):
     batch_size = embeded_x.shape[0]
     lstm_in = self.W(embeded_x) + self.U(h_prev)
     m_tmp, h_tmp = F.lstm(m_prev, lstm_in)
     # flags if feeding previous output
     feed_prev = F.broadcast_to(F.expand_dims(x.data != IGNORE_LABEL, -1),
                                (batch_size, self.hidden_size))
     m = F.where(feed_prev, m_tmp, m_prev)
     h = F.where(feed_prev, h_tmp, h_prev)
     return m, h
Beispiel #21
0
    def check_backward(self, data, grad):
        x = chainer.Variable(data)
        bx = functions.broadcast_to(x, self.out_shape)

        func = bx.creator
        f = lambda: func.forward((data,))

        bx.grad = grad
        bx.backward()
        gx, = gradient_check.numerical_grad(f, (data,), (bx.grad,))
        gradient_check.assert_allclose(gx, x.grad)
Beispiel #22
0
def grad_unbias_hook(optimizer):
    for p in optimizer.target.params():
        grad_data = p.grad
        bs = grad_data.shape[0]
        shape = grad_data.shape
        
        grad = Variable(grad_data)
        mean_grad = F.broadcast_to(F.sum(grad) / bs, shape)
        grad_unbias = grad - mean_grad

        p.grad = grad_unbias.data
    def __call__(self, x, context):

        x = F.broadcast_to(x[:, None], (context.shape[0], context.shape[1]))
        x = F.reshape(x, (context.shape[0] * context.shape[1],))

        context = context.reshape((context.shape[0] * context.shape[1]))
        e = self.rnn.charRNN(context)

        loss = self.loss_func(e, x)
        reporter.report({'loss': loss}, self)
        return loss
Beispiel #24
0
 def setUp(self):
     self.x1 = numpy.random.uniform(
         .5, 1, (batch_size, m, k)).astype(numpy.float32)
     self.x2 = numpy.random.uniform(
         .5, 1, (k, n)).astype(numpy.float32)
     self.gy = numpy.random.uniform(
         -1, 1, (batch_size, m, n)).astype(numpy.float32)
     self.op = lambda x, y: F.batch_matmul(
         x, F.broadcast_to(F.expand_dims(y, 0), (batch_size, k, n)))
     self.forward_answer = numpy.array([
         numpy.dot(self.x1[i], self.x2)
         for i in six.moves.range(batch_size)])
Beispiel #25
0
def look_at(vertices, eye, at=None, up=None):
    """
    "Look at" transformation of vertices.
    """
    assert (vertices.ndim == 3)

    xp = chainer.cuda.get_array_module(vertices)
    batch_size = vertices.shape[0]
    if at is None:
        at = xp.array([0, 0, 0], 'float32')
    if up is None:
        up = xp.array([0, 1, 0], 'float32')

    if isinstance(eye, list) or isinstance(eye, tuple):
        eye = xp.array(eye, 'float32')
    if eye.ndim == 1:
        eye = cf.tile(eye[None, :], (batch_size, 1))
    if at.ndim == 1:
        at = cf.tile(at[None, :], (batch_size, 1))
    if up.ndim == 1:
        up = cf.tile(up[None, :], (batch_size, 1))

    # create new axes
    z_axis = cf.normalize(at - eye)
    x_axis = cf.normalize(neural_renderer.cross(up, z_axis))
    y_axis = cf.normalize(neural_renderer.cross(z_axis, x_axis))

    # create rotation matrix: [bs, 3, 3]
    r = cf.concat((x_axis[:, None, :], y_axis[:, None, :], z_axis[:, None, :]), axis=1)
    if r.shape[0] != vertices.shape[0]:
        r = cf.broadcast_to(r, (vertices.shape[0], 3, 3))

    # apply
    # [bs, nv, 3] -> [bs, nv, 3] -> [bs, nv, 3]
    if vertices.shape != eye.shape:
        eye = cf.broadcast_to(eye[:, None, :], vertices.shape)
    vertices = vertices - eye
    vertices = cf.matmul(vertices, r, transb=True)

    return vertices
Beispiel #26
0
    def __call__(self, x):
        q_z = self.encoder(x)
        z = q_z.sample(self.k)
        p_x = self.decoder(z)
        p_z = self.prior()

        reconstr = F.mean(p_x.log_prob(
            F.broadcast_to(x[None, :], (self.k,) + x.shape)))
        kl_penalty = F.mean(chainer.kl_divergence(q_z, p_z))
        loss = - (reconstr - self.beta * kl_penalty)
        reporter.report({'loss': loss}, self)
        reporter.report({'reconstr': reconstr}, self)
        reporter.report({'kl_penalty': kl_penalty}, self)
        return loss
Beispiel #27
0
 def __call__(self, y, m_prev, s_prev, h_forward, h_backword, enable, disable_value):
     # m is memory cell of lstm, s is previous hidden output
     # calculate attention
     c = self._attention(h_forward, h_backword, s_prev, enable, disable_value)
     # decode once
     embeded_y = self.E(y)
     batch_size = y.shape[0]
     lstm_in = self.W(embeded_y) + self.U(s_prev) + self.C(c)
     m_tmp, s_tmp = F.lstm(m_prev, lstm_in)
     feed_prev = F.broadcast_to(F.expand_dims(y.data != IGNORE_LABEL, -1),
                                (batch_size, self.hidden_size))
     m = F.where(feed_prev, m_tmp, m_prev)
     s = F.where(feed_prev, s_tmp, s_prev)
     t = self.U_o(s) + self.V_o(embeded_y) + self.C_o(c)
     return self.W_o(t), m, s
Beispiel #28
0
    def __call__(self, x):
        """Normalize input and scale it.

        Args:
            x (chainer.Variable): A variable holding 4-dimensional array.
                Its :obj:`dtype` is :obj:`numpy.float32`.

        Returns:
            chainer.Variable:
            The shape and :obj:`dtype` are same as those of input.
        """

        x = F.normalize(x, eps=self.eps, axis=1)
        scale = F.broadcast_to(self.scale[:, np.newaxis, np.newaxis], x.shape)
        return x * scale
    def __call__(self, x, context):

        x = F.broadcast_to(x[:, None], (context.shape[0], context.shape[1]))
        x = F.reshape(x, (context.shape[0] * context.shape[1],))

        if args.subword == 'rnn':
            context = context.reshape((context.shape[0] * context.shape[1]))
            e = self.rnn.charRNN(context)

        if args.subword == 'none':
            e = self.embed(context)
            e = F.reshape(e, (e.shape[0] * e.shape[1], e.shape[2]))

        loss = self.loss_func(e, x)
        reporter.report({'loss': loss}, self)
        return loss
Beispiel #30
0
    def metric(self, model, images, labels):
        xp = cupy.get_array_module(images)
        batchsize = len(images)
        embeddings = model(images)

        embeddings = F.reshape(embeddings, ((batchsize, -1)))
        shape = embeddings.shape
        metric = 0
        for embedding, label in zip(embeddings, labels):
            eculideans = F.sum(
                (embeddings - F.broadcast_to(embedding,
                                             (batchsize, shape[1])))**2,
                axis=1)
            ratios = -F.log_softmax(F.expand_dims(-eculideans, axis=0))[0]
            metric += F.sum(ratios[xp.where(labels == label)])
        chainer.report({'metric': metric}, model)
        return metric
Beispiel #31
0
  def _context(self, p, fb_mat, fbe_mat):
    batch_size, source_length, _ = fb_mat.data.shape
    # {pe,e}_mat: shape = [batch * srclen, atten]
    pe_mat = F.reshape(
        F.broadcast_to(
            F.expand_dims(self.p_e(p), 1),
            [batch_size, source_length, self.atten_size]),
        [batch_size * source_length, self.atten_size])
    e_mat = F.tanh(fbe_mat + pe_mat)
    # a_mat: shape = [batch, srclen]
    a_mat = F.softmax(F.reshape(self.e_a(e_mat), [batch_size, source_length]))
    # q: shape = [batch, 2 * hidden]
    q = F.reshape(
        F.batch_matmul(a_mat, fb_mat, transa=True),
        [batch_size, 2 * self.hidden_size])

    return q
Beispiel #32
0
def maximum_entropy_mellowmax(values, omega=1., beta_min=-10, beta_max=10):
    """Maximum entropy mellowmax policy function.

    This function provides a categorical distribution whose expectation matches
    the one of mellowmax function while maximizing its entropy.

    See: http://arxiv.org/abs/1612.05628

    Args:
        values (Variable or ndarray):
            Input values. Mellowmax is taken along the second axis.
        omega (float):
            Parameter of mellowmax.
        beta_min (float):
            Minimum value of beta, used in Brent's algorithm.
        beta_max (float):
            Maximum value of beta, used in Brent's algorithm.
    Returns:
        outputs (Variable)
    """
    xp = chainer.cuda.get_array_module(values)
    mm = mellowmax(values, axis=1)

    # Advantage: Q - mellowmax(Q)
    batch_adv = values - F.broadcast_to(F.expand_dims(mm, 1), values.shape)
    # Move data to CPU because we use Brent's algorithm in scipy
    batch_adv = chainer.cuda.to_cpu(batch_adv.data)
    batch_beta = np.empty(mm.shape, dtype=np.float32)

    # Beta is computed as the root of this function
    def f(y, adv):
        return np.sum(np.exp(y * adv) * adv)

    for idx in np.ndindex(mm.shape):
        idx_full = idx[:1] + (slice(None), ) + idx[1:]
        adv = batch_adv[idx_full]
        try:
            beta = scipy.optimize.brentq(f,
                                         a=beta_min,
                                         b=beta_max,
                                         args=(adv, ))
        except ValueError:
            beta = 0
        batch_beta[idx] = beta

    return F.softmax(xp.expand_dims(xp.asarray(batch_beta), 1) * values)
Beispiel #33
0
    def __call__(self, x, contexts):
        # print('skipgram')
        x = x.astype(np.int32)
        e = self.embed(contexts)
        batch_size, n_context, n_units = e.shape
        x = F.broadcast_to(x[:, None], (batch_size, n_context))
        e = F.reshape(e, (batch_size * n_context, n_units))
        x = F.reshape(x, (batch_size * n_context, ))

        # for i in range(7,len(self.embed.W.data)):
        #     if i in [121,207,602,603]:
        #         print(self.embed.W.data[i][0], end = ' ')
        # print()

        loss = self.loss_func(e, x)
        reporter.report({'loss': loss}, self)
        return loss
Beispiel #34
0
 def compute_r(self, x, v):
     resnet_in = cf.relu(self.conv1_1(x))
     residual = cf.relu(self.conv1_res(resnet_in))
     out = cf.relu(self.conv1_2(resnet_in))
     out = cf.relu(self.conv1_3(out)) + residual
     v = cf.reshape(v, (v.shape[0], v.shape[1], 1, 1))
     broadcast_shape = (
         out.shape[0],
         v.shape[1],
     ) + out.shape[2:]
     v = cf.broadcast_to(v, shape=broadcast_shape)
     resnet_in = cf.concat((out, v), axis=1)
     residual = cf.relu(self.conv2_res(resnet_in))
     out = cf.relu(self.conv2_1(resnet_in))
     out = cf.relu(self.conv2_2(out)) + residual
     out = self.conv2_3(out)
     return out
Beispiel #35
0
        def compute_context(previous_state):
            decoder_factor = F.broadcast_to(
                F.reshape(self.s(previous_state),
                          (batch_size, 1, self.n_attention_units)),
                (batch_size, max_length, self.n_attention_units))

            attention = F.softmax(
                F.reshape(
                    self.o(
                        F.reshape(F.tanh(encoder_factor + decoder_factor),
                                  (batch_size * max_length,
                                   self.n_attention_units))),
                    (batch_size, max_length)))

            context = F.reshape(F.batch_matmul(attention, hxs, transa=True),
                                (batch_size, encoder_output_size))
            return context
Beispiel #36
0
    def metric(self, model, images, labels):
        batchsize = len(images)
        embeddings = model(images)

        embeddings = F.reshape(embeddings, ((batchsize, -1)))
        shape = embeddings.shape
        metric = 0
        for embedding in embeddings:
            eculideans = F.sum(
                (embeddings - F.broadcast_to(embedding,
                                             (batchsize, shape[1])))**2,
                axis=1)
            ratios = -F.log_softmax(F.expand_dims(-eculideans, axis=0))[0]
            weights = F.softmax(F.expand_dims(-eculideans, axis=0))[0]
            metric += F.sum(ratios * weights)
        chainer.report({'metric': metric}, model)
        return metric
Beispiel #37
0
 def forward(self, xs, h, c, mask):
     batch_size = len(xs)
     lens = [x.shape[0] for x in xs]
     #max_len = max(lens)
     max_len = self.sequence_length
     #mask = (np.expand_dims(np.arange(max_len), 0) <
     #        np.expand_dims(lens, 1)).astype(np.float)
     #h = np.zeros((batch_size, self.num_hidden), dtype=np.float32)
     #c = np.zeros((batch_size, self.num_hidden), dtype=np.float32)
     #h = self.initial_h
     #c = self.initial_c
     inputs = F.pad_sequence(xs)
     x = None
     input = None
     gate = None
     i = None
     o = None
     f = None
     nc = None
     nh = None
     m = None
     pmask = None
     nmask = None
     for time in range(max_len):
         x = inputs[:, time]
         input = F.concat((x, h), axis=1)
         gate = self.l(input)
         i = gate[:, 0:self.num_hidden]
         o = gate[:, self.num_hidden:self.num_hidden * 2]
         f = gate[:, self.num_hidden * 2:self.num_hidden * 3]
         nc = gate[:, self.num_hidden * 3:self.num_hidden * 4]
         #i, o, f, nc = F.split_axis(gate, 4, axis=1)
         i = F.sigmoid(i)
         o = F.sigmoid(o)
         f = F.sigmoid(f)
         nc = F.tanh(nc)
         nc = f * c + i * nc
         nh = o * F.tanh(nc)
         m = mask[:, time]
         pmask = F.reshape(m, (self.batch_size, ))
         pmask = F.broadcast_to(F.expand_dims(pmask, axis=1),
                                (self.batch_size, self.num_hidden))
         nmask = 1.0 - pmask
         h = nh * pmask + h * nmask
     return h
Beispiel #38
0
    def __call__(self, x):
        batchsize = x.shape[0]

        # Compute q(z|x)
        qmu, qln_var = self.encode(x)

        kl_inst = gaussian_kl_divergence_inst(qmu, qln_var)
        logp_inst = None
        self.kl = F.sum(kl_inst) / batchsize
        self.logp = 0
        for j in xrange(self.num_zsamples):
            # z ~ q(z|x)
            z = F.gaussian(qmu, qln_var)

            # Compute p(x|z)
            pxz = self.decode(z)
            logpxz = pxz(x)
            if logp_inst is None:
                logp_inst = logpxz
            else:
                logp_inst += logpxz

            # Compute objective
            batchsize = logpxz.shape[0]
            self.logp += F.sum(logpxz) / batchsize

        # Compute standard deviation
        logp_inst /= self.num_zsamples
        obj_inst = kl_inst - logp_inst
        obj_inst_mean = F.sum(obj_inst) / batchsize
        obj_c = obj_inst - F.broadcast_to(obj_inst_mean, obj_inst.shape)
        obj_var = F.sum(obj_c * obj_c) / (batchsize - 1)

        self.logp /= self.num_zsamples
        self.obj = self.kl - self.logp

        reporter.report(
            {
                'obj': self.obj,
                'obj_var': obj_var,
                'kl': self.kl,
                'logp': self.logp
            }, self)

        return self.obj
Beispiel #39
0
    def predict(self, x):
        batchsize = len(x)

        xs = [F.dropout(self.embed(Variable(doc)), ratio=0.5) for doc in x]
        hy, cy, ys = self.bi_lstm(hx=None, cx=None, xs=xs)
        # hy: bilstmの最終的な中間層の状態(2, batchsize, mid_size)
        # cy: ??
        # ys: 中間層の各状態のベクトルを保存してある(batchsize, lim, mid_size*2)

        ys = [F.dropout(midvec, ratio=0.3) for midvec in ys]
        concat_ys = F.concat(ys, axis=0)  # (batchsize*lim, mid_size*2)
        attn = F.dropout(self.l_attn(concat_ys),
                         ratio=0.25)  # (batchsize*lim, 1)
        split_attention = F.split_axis(attn,
                                       np.cumsum([len(doc)
                                                  for doc in xs])[:-1],
                                       axis=0)  # (batchsize, lim, 1)
        split_attention_pad = F.pad_sequence(split_attention, padding=-1024.0)
        attn_softmax = F.softmax(split_attention_pad,
                                 axis=1)  # (batchsize, lim, 1)
        ys_pad = F.pad_sequence(ys, length=None,
                                padding=0.0)  # (batchsize, lim, mid_size*2)

        # ys と attn_softmax の積を計算するためにshapeを揃える
        ys_pad_reshape = F.reshape(
            ys_pad, (-1, ys_pad.shape[-1]))  # (batchsize*lim, mid_size*2)
        attn_softmax_reshape = F.broadcast_to(
            F.reshape(attn_softmax, (-1, attn_softmax.shape[-1])),
            ys_pad_reshape.shape)  # (batchsize*lim, mid_size*2)

        attention_hidden = ys_pad_reshape * attn_softmax_reshape  # 隠れ層 * 重みを計算 (batchsize*lim, mid_size*2)
        attention_hidden_reshape = F.reshape(
            attention_hidden,
            (batchsize, -1,
             attention_hidden.shape[-1]))  # (batchsize*lim, mid_size*2)

        result = F.sum(attention_hidden_reshape,
                       axis=1)  # 隠れ層の重み付き和を計算(batchsize, mid_size*2)

        if chainer.config.train:
            return self.l3(F.dropout(result, ratio=0.2))
#            return self.l3(result)
        else:
            attn_list = F.transpose(attn_softmax[0])[0]
            return self.l3(result), attn_list.data
Beispiel #40
0
 def evaluate_with_quantile_thresholds(taus):
     assert taus.ndim == 2
     assert taus.shape[0] == batch_size
     n_taus = taus.shape[1]
     phi_taus = self.phi(taus)
     assert phi_taus.ndim == 3
     assert phi_taus.shape == (batch_size, n_taus, hidden_size)
     psi_x_b = F.broadcast_to(F.expand_dims(psi_x, axis=1),
                              phi_taus.shape)
     h = psi_x_b * phi_taus
     h = F.reshape(h, (-1, hidden_size))
     assert h.shape == (batch_size * n_taus, hidden_size)
     h = self.f(h)
     assert h.ndim == 2
     assert h.shape[0] == batch_size * n_taus
     n_actions = h.shape[-1]
     h = F.reshape(h, (batch_size, n_taus, n_actions))
     return QuantileDiscreteActionValue(h)
    def __call__(self, a_list):
        e_list = []
        self.empha = []
        sum_e = Variable(np.array([[0]], dtype='float32'))
        for a in a_list:
            w = functions.tanh(self.pw(a))
            e = functions.exp(self.we(w))
            e_list.append(e)
            sum_e += e

        ZEROS = Variable(np.zeros((1, self.hidden_size), dtype='float32'))
        aa = ZEROS
        for a, e in zip(a_list, e_list):
            e /= sum_e
            self.empha.append(e)
            aa += a * functions.broadcast_to(e, (1, self.hidden_size))
        #aa += functions.reshape(functions.batch_matmul(a, e), (batch_size, self.hidden_size))
        return aa
    def forward_onestep(self, prev_h_g, prev_h_e, prev_c_e, x, v, r):
        broadcast_shape = (
            prev_h_e.shape[0],
            v.shape[1],
        ) + prev_h_e.shape[2:]
        v = cf.reshape(v, v.shape + (1, 1))
        v = cf.broadcast_to(v, shape=broadcast_shape)

        x = cf.relu(self.params.conv_x_1(x))
        x = cf.relu(self.params.conv_x_2(x))

        lstm_in = cf.concat((prev_h_e, prev_h_g, x, v, r), axis=1)
        forget_gate = cf.sigmoid(self.params.lstm_f(lstm_in))
        input_gate = cf.sigmoid(self.params.lstm_i(lstm_in))
        next_c = forget_gate * prev_c_e + input_gate * cf.tanh(
            self.params.lstm_tanh(lstm_in))
        next_h = cf.sigmoid(self.params.lstm_o(lstm_in)) * cf.tanh(next_c)
        return next_h, next_c
Beispiel #43
0
 def compute_discriminative_loss(self, fact_vectors):
     reason_pool = F.max(fact_vectors, axis=0)  # dim of fact_vectors is (n_view_steps, batch_size, voc_size)
     batch_size, voc_size = reason_pool.data.shape
     loss = 0
     for i_word in range(voc_size):
         one_word_score = \
             F.get_item(reason_pool, [range(batch_size), i_word])
         one_word_score = F.expand_dims(one_word_score, axis=1)
         broaded_one_word_score = \
             F.broadcast_to(one_word_score, reason_pool.data.shape)
         real_value_of_hinge = 1-(reason_pool-broaded_one_word_score)
         zero_mat = Variable(self.xp.zeros_like( \
                     real_value_of_hinge.data, self.xp.float32), volatile='auto')
         hinge_loss = F.where( \
             real_value_of_hinge.data<0, zero_mat, real_value_of_hinge.data)
         loss += F.sum(hinge_loss) - voc_size
     loss /= (voc_size-1)*voc_size
     return loss
Beispiel #44
0
    def __call__(self, x):
        if self.dr:
            with chainer.using_config('train', True):
                x = F.dropout(x, self.dr)
        if self.gap:
            x = F.sum(x, axis=(2,3))
        N = x.shape[0]
        #Below code copyed from https://github.com/pfnet-research/chainer-gan-lib/blob/master/minibatch_discrimination/net.py
        feature = F.reshape(F.leaky_relu(x), (N, -1))
        m = F.reshape(self.md(feature), (N, self.B * self.C, 1))
        m0 = F.broadcast_to(m, (N, self.B * self.C, N))
        m1 = F.transpose(m0, (2, 1, 0))
        d = F.absolute(F.reshape(m0 - m1, (N, self.B, self.C, N)))
        d = F.sum(F.exp(-F.sum(d, axis=2)), axis=2) - 1
        h = F.concat([feature, d])

        h = self.l(h)
        return h
Beispiel #45
0
    def forward(self, u):
        B, C, H, W = u.shape

        h1 = u.reshape((B, C, H * W))

        h2 = self.context(u)
        h2 = h2.reshape((B, H * W, 1))
        h2 = F.softmax(h2)

        z = F.batch_matmul(h1, h2)

        x = F.relu(self.ln(self.down(z)))
        x = self.up(x)

        x = F.broadcast_to(x, (H, W, B, C))
        x = x.transpose((2, 3, 0, 1))

        return u + x
Beispiel #46
0
def lighting(faces,
             textures,
             intensity_ambient=0.5,
             intensity_directional=0.5,
             color_ambient=(1, 1, 1),
             color_directional=(1, 1, 1),
             direction=(0, 1, 0)):
    xp = chainer.cuda.get_array_module(faces)
    bs, nf = faces.shape[:2]

    # arguments
    if isinstance(color_ambient, tuple) or isinstance(color_ambient, list):
        color_ambient = xp.array(color_ambient, 'float32')
    if isinstance(color_directional, tuple) or isinstance(
            color_directional, list):
        color_directional = xp.array(color_directional, 'float32')
    if isinstance(direction, tuple) or isinstance(direction, list):
        direction = xp.array(direction, 'float32')
    if color_ambient.ndim == 1:
        color_ambient = cf.broadcast_to(color_ambient[None, :], (bs, 3))
    if color_directional.ndim == 1:
        color_directional = cf.broadcast_to(color_directional[None, :], (bs,
                                                                         3))
        if direction.ndim == 1:
            direction = cf.broadcast_to(direction[None, :], (bs, 3))

    # create light
    light = xp.zeros((bs, nf, 3), 'float32')

    # ambient light
    if intensity_ambient != 0:
        light = light + intensity_ambient * cf.broadcast_to(
            color_ambient[:, None, :], light.shape)

        # directional light
    if intensity_directional != 0:
        faces = faces.reshape((bs * nf, 3, 3))
        v10 = faces[:, 0] - faces[:, 1]
        v12 = faces[:, 2] - faces[:, 1]
        normals = cf.normalize(cross(v10, v12))
        normals = normals.reshape((bs, nf, 3))

        if direction.ndim == 2:
            direction = cf.broadcast_to(direction[:, None, :], normals.shape)
        cos = cf.relu(cf.sum(normals * direction, axis=2))
        light = (light + intensity_directional * cfmath.mul(
            *cf.broadcast(color_directional[:, None, :], cos[:, :, None])))

        # apply
    light = cf.broadcast_to(light[:, :, None, None, None, :], textures.shape)
    textures = textures * light
    return textures
Beispiel #47
0
    def __call__(self, x):
        N = x.shape[0]
        h = F.leaky_relu(self.bn1(self.conv1(x)), slope=0.2)
        h = F.leaky_relu(self.bn2(self.conv2(h)), slope=0.2)
        h = F.leaky_relu(self.bn3(self.conv3(h)), slope=0.2)
        h = F.reshape(h, (x.shape[0], -1))
        feature = F.reshape(self.l_hidden(h), (N, self.B, self.C, 1))
        feature = F.broadcast_to(feature, (N, self.B, self.C, N))
        feature_batch = F.transpose(feature, (3, 1, 2, 0))
        feature = F.absolute(feature - feature_batch)
        feature = F.exp(-F.sum(feature, axis=2))
        feature = F.sum(feature, axis=2) - 1
        h = F.concat([h, feature])

        if self.use_feature_matching:
            return h, self.lout(h)
        else:
            return self.lout(h)
def generate_2dmeshgrid(H, W, N, xp=np):
    """Generate 2d meshgrid.

    Returns:
        Array: Shape is (N, 3, H*W)
    """
    global meshgrid
    if meshgrid is None or meshgrid.shape[2] != H * W:
        ys, xs = xp.meshgrid(xp.arange(0, H, dtype=np.float32),
                             xp.arange(0, W, dtype=np.float32),
                             indexing='ij',
                             copy=False)
        meshgrid = xp.concatenate(
            [xs[None], ys[None],
             xp.ones(
                 (1, H, W), dtype=np.float32)], axis=0).reshape(1, 3, H * W)
        meshgrid = F.broadcast_to(meshgrid, (N, 3, H * W))
    return meshgrid
Beispiel #49
0
def rescale_adj(adj):
    """Normalize adjacency matrix
    It ensures that activations are on a similar scale irrespective of
    the number of neighbors
    Args:
        adj (:class:`chainer.Variable`, or :class:`numpy.ndarray` \
        or :class:`cupy.ndarray`):
            adjacency matrix
    Returns:
        :class:`chainer.Variable`: normalized adjacency matrix
    """
    xp = cuda.get_array_module(adj)
    num_neighbors = functions.sum(adj, axis=(1, 2))
    base = xp.ones(num_neighbors.shape, dtype=xp.float32)
    cond = num_neighbors.data != 0
    num_neighbors_inv = 1 / functions.where(cond, num_neighbors, base)
    return adj * functions.broadcast_to(num_neighbors_inv[:, None, None, :],
                                        adj.shape)
Beispiel #50
0
 def __call__(self, x):
     xs = []
     h = self.f0(x)
     # tail
     xs.append(self.t0(h))
     xs.append(self.t1(F.max_pooling_2d(xs[0], 2)))
     xs.append(self.t2(F.max_pooling_2d(xs[1], 2)))
     b = self.t3(xs[2])
     # body
     xs[2] = self.b2(F.concat([xs[2], F.broadcast_to(b, xs[2].shape)]))
     xs[1] = self.b1(F.concat([xs[1], upsample(xs[2], 2)]))
     xs[0] = self.b0(F.concat([xs[0], upsample(xs[1], 2)]))
     # head
     xs[1] = self.h1(F.concat([xs[1], F.max_pooling_2d(xs[0], 2)]))
     xs[2] = self.h2(F.concat([xs[2], F.max_pooling_2d(xs[1], 2)]))
     h = self.h3(xs[2])
     y = self.fin(h)
     return y
Beispiel #51
0
    def get_elbo(self, x, k=None, with_ll=False):
        if not k:
            k = self.k
        q_z = self.encoder(x)
        z = q_z.sample(k)
        p_x = self.decoder(z, n_batch_axes=2)
        p_z = self.prior()

        reconstr = p_x.log_prob(F.broadcast_to(x[None, :], (k, ) + x.shape))
        kl_penalty = q_z.log_prob(z) - p_z.log_prob(z)

        elbo_k = reconstr - kl_penalty
        elbo = F.mean(elbo_k)
        if with_ll:
            log_likelihood = F.mean(F.logsumexp(elbo_k, axis=0) - numpy.log(k))
            return elbo, log_likelihood
        else:
            return elbo
Beispiel #52
0
    def __call__(self, x):
        batch_size, sentence_len = x.shape
        n_class, n_embed = self.c.shape

        e = self.embed(x)
        ep = self.pad_sequence(e)

        # Eq. (2)
        c = F.broadcast_to(self.c, (batch_size, n_class, n_embed))
        g = F.matmul(ep, c, transb=True)
        norm_ep = self.xp.expand_dims(self.xp.linalg.norm(ep.data, axis=2),
                                      axis=2)
        norm_c = self.xp.expand_dims(self.xp.linalg.norm(c.data, axis=2),
                                     axis=2)
        denom = self.xp.matmul(norm_ep, self.xp.transpose(
            norm_c, (0, 2, 1))) + 1e-10  # avoid zero division
        g = g / denom

        # Eq. (3)
        g = self.make_ngram(
            g)  # (batch_size, sentence_len,  window_size, n_class)
        g = F.reshape(
            g, (batch_size * sentence_len * n_class, self.n_window * 2 + 1))
        u = F.relu(
            self.attention(g))  # (batch_size * sentence_len * n_class, 1)
        u = F.reshape(u, (batch_size, sentence_len, n_class))

        # Eq. (4)
        m = F.max(u, axis=2)  # (batch_size, sentence_len)

        # Eq. (5)
        mask = (x == PAD).astype(
            self.xp.float32) * -1024.0  # make attention-scores for PAD 0
        m = m + mask
        beta = F.softmax(m)
        beta = F.expand_dims(beta, axis=1)

        # Eq. (6)
        z = F.reshape(F.matmul(beta, e),
                      (batch_size, n_embed))  # (batch_size, n_embed)

        # f_2
        h = F.dropout(F.relu(self.l1(z)), ratio=.8)
        return self.l2(h)
    def update_core(self):
        train_iter = self.get_iterator('main')

        batch = train_iter.next()
        t_data = self.converter(batch, self.device)
        B = t_data.shape[0]
        y_data = self.func(B)
        B, C, H, W = t_data.shape[:4]
        y_data = F.broadcast_to(y_data, (B, C, H, W))
        loss = compute_loss(y_data, t_data)

        pos_model = self.models['position']
        dir_model = self.models['direction']

        reporter.report({
            'main/loss': loss,
            'camera_position/x': pos_model.camera_position[0],
            'camera_position/y': pos_model.camera_position[1],
            'camera_position/z': pos_model.camera_position[2],
            'camera_direction/x': dir_model.camera_zaxis[0],
            'camera_direction/y': dir_model.camera_zaxis[1],
            'camera_direction/z': dir_model.camera_zaxis[2]
        })

        y_data = y_data.data
        if self.device >= 0:
            y_data = y_data.get()
            cuda.get_device_from_id(self.device).synchronize()

        img = y_data[0]
        img = np.transpose(img, (1, 2, 0))
        img = np.clip(img, 0, 1)
        img = (img * 255).astype(np.uint8)
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
        save_progress_image(self.odir, self.count, img)

        for o in self.optimizers.values():
            o.target.cleargrads()

        loss.backward()

        for o in self.optimizers.values():
            o.update()
        self.count += 1
Beispiel #54
0
    def attention(self, hOut, encOut, args): #TODO この関数も何か汚い
        """calc attention"""
        if args.attention == 0: #アテンションをしない時hOutをそのまま返す
            return hOut
        #ターゲット側の下準備
        hOut1 = self.attnIn(hOut) #[batch, Dim]
        hOut2 = F.expand_dims(hOut1, axis=1) #[batch, 1, Dim]
        hOut3 = F.broadcast_to(hOut2, (len(hOut2), len(encOut[0]), args.hiddenDim)) #[batch, max(enc_sentlen), Dim] 今encOutとhOut3は同じshapeのはず

        aval = F.sum(encOut*hOut3, axis=2) #[batch, sentlen]

        cAttn1 = F.softmax(aval) #[batch, max(enc_sentlen)] paddingで0のところはかなり小さい数字の確率で出てくる
        cAttn2 = F.expand_dims(cAttn1, axis=1) #[batch, 1, max(enc_sentlen)]
        cAttn3 = F.batch_matmul(cAttn2, encOut) #[batch, 1, Dim]
        context = F.reshape(cAttn3, (len(encOut), len(encOut[0][0]))) #[batch, Dim] エンコーダコンテキストベクトルの完成

        c1 = F.concat((hOut, context)) #[batch, Dim + Dim]
        c2 = self.attnOut(c1) #[bathc, Dim]
        return F.tanh(c2) #活性化
Beispiel #55
0
    def __call__(self, inputs):
        pos_x, pos_y, offset_x, ego_x, ego_y, pose_x, pose_y = self._prepare_input(
            inputs)
        batch_size, past_len, _ = pos_x.shape

        h_pos = self.pos_encoder(pos_x)
        h_pose = self.pose_encoder(pose_x)
        h = F.concat((h_pos, h_pose), axis=1)  # (B, C, 2)
        h = self.inter(h)
        h_pos = self.pos_decoder(h)
        pred_y = self.last(h_pos)  # (B, 10, C+6+28)
        pred_y = F.swapaxes(pred_y, 1, 2)
        pred_y = pred_y[:, :pos_y.shape[1], :]
        loss = F.mean_squared_error(pred_y, pos_y)

        pred_y = pred_y + F.broadcast_to(F.expand_dims(offset_x, 1),
                                         pred_y.shape)
        pred_y = cuda.to_cpu(pred_y.data) * self._std + self._mean
        return loss, pred_y, None
    def log_propensity_independent(self, x, action):
        xp = cuda.get_array_module(action)
        pred = self._predict(x)

        final_action = action
        if self.k > 0 and action.shape[1] < pred.shape[1]:
            all_actions = F.broadcast_to(xp.arange(0, pred.shape[1],
                                                   dtype=action.data.dtype),
                                         pred.shape)
            inv_items = inverse_select_items_per_row(all_actions, action)
            items = select_items_per_row(all_actions, action)
            final_action = F.concat((items, inv_items), axis=1)

        pred = select_items_per_row(pred, final_action)

        results = F.log_softmax(pred)
        if self.k > 0:
            results = results[:, :self.k]
        return results
Beispiel #57
0
 def __call__(self, x, y=None):
     h = x
     h = self.block1(h)
     h = self.block2(h)
     h = self.block3(h)
     if y is not None:
         emb = self.l_y(y)
         H, W = h.shape[2], h.shape[3]
         emb = F.broadcast_to(
             F.reshape(emb, (emb.shape[0], emb.shape[1], 1, 1)),
             (emb.shape[0], emb.shape[1], H, W))
         h = F.concat([h, emb], axis=1)
     h = self.block4(h)
     h = self.block5(h)
     h = self.block6(h)
     h = self.activation(h)
     h = F.sum(h, axis=(2, 3))  # Global pooling
     output = self.l7(h)
     return output
Beispiel #58
0
    def __call__(self, v, h, label):
        v_t = self.vertical_conv_t(v)
        v_s = self.vertical_conv_s(v)
        to_vertical_t = self.v_to_h_conv_t(v_t)
        to_vertical_s = self.v_to_h_conv_s(v_s)

        # v_gate = self.vertical_gate_conv(v)
        # label bias is added to both vertical and horizontal conv
        # here we take only shape as it should be the same
        label = F.broadcast_to(F.expand_dims(F.expand_dims(self.label(label), -1), -1), v_t.shape)
        v_t, v_s = v_t + label, v_s + label
        v = F.tanh(v_t) * F.sigmoid(v_s)

        h_t = self.horizontal_conv_t(h)
        h_s = self.horizontal_conv_s(h)
        h_t, h_s = h_t + to_vertical_t + label, h_s + to_vertical_s + label
        h = self.horizontal_output(F.tanh(h_t) * F.sigmoid(h_s))

        return v, h
Beispiel #59
0
    def get_initial_logits(self, mb_size=None):
        if mb_size is None:
            mb_size = self.mb_size
        assert mb_size is not None
        #print self.multi_target_signal.data.shape
        previous_states = None
        if self.multi_target_signal is not None:
            previous_states = self.multi_target_signal
        else:
            previous_states = self.decoder_chain.gru.get_initial_states(
                mb_size)
        #print previous_states
        prev_y = F.broadcast_to(self.decoder_chain.bos_embeding,
                                (mb_size, self.decoder_chain.Eo))

        new_states, logits, attn = self.advance_one_step(
            previous_states, prev_y)

        return new_states, logits, attn
Beispiel #60
0
        def compute_ctxt(previous_state, prev_word_embedding=None):
            current_mb_size = previous_state.data.shape[0]
            if current_mb_size < mb_size:
                al_factor, _ = F.split_axis(precomputed_al_factor,
                                            (current_mb_size, ), 0)
                used_fb_concat, _ = F.split_axis(fb_concat,
                                                 (current_mb_size, ), 0)
                if mask_length > 0:
                    used_concatenated_penalties = concatenated_penalties[:
                                                                         current_mb_size]
            else:
                al_factor = precomputed_al_factor
                used_fb_concat = fb_concat
                if mask_length > 0:
                    used_concatenated_penalties = concatenated_penalties

            state_al_factor = self.al_lin_s(previous_state)

            #As suggested by Isao Goto
            if prev_word_embedding is not None:
                state_al_factor = state_al_factor + self.al_lin_y(
                    prev_word_embedding)

            state_al_factor_bc = F.broadcast_to(
                F.reshape(state_al_factor, (current_mb_size, 1, self.Ha)),
                (current_mb_size, nb_elems, self.Ha))
            a_coeffs = F.reshape(
                self.al_lin_o(
                    F.reshape(F.tanh(state_al_factor_bc + al_factor),
                              (current_mb_size * nb_elems, self.Ha))),
                (current_mb_size, nb_elems))

            if mask_length > 0:
                with cuda.get_device_from_array(used_concatenated_penalties):
                    a_coeffs = a_coeffs + used_concatenated_penalties  # - 10000 * (1-used_concatenated_mask.data)

            attn = F.softmax(a_coeffs)

            ci = F.reshape(batch_matmul(attn, used_fb_concat, transa=True),
                           (current_mb_size, self.Hi))

            return ci, attn