Example #1
0
def gaussian_kl_divergence(mean, ln_var):
    """Calculate KL-divergence between given gaussian and the standard one.

    Given two variable ``mean`` representing :math:`\\mu` and ``ln_var``
    representing :math:`\\log(\\sigma^2)`, this function returns a variable
    representing KL-divergence between given multi-dimensional gaussian
    :math:`N(\\mu, S)` and the standard Gaussian :math:`N(0, I)`

    .. math::

       D_{\\mathbf{KL}}(N(\\mu, S) \\| N(0, I)),

    where :math:`S` is a diagonal matrix such that :math:`S_{ii} = \\sigma_i^2`
    and :math:`I` is an identity matrix.

    Args:
        mean (~chainer.Variable): A variable representing mean of given
            gaussian distribution, :math:`\\mu`.
        ln_var (~chainer.Variable): A variable representing logarithm of
            variance of given gaussian distribution, :math:`\\log(\\sigma^2)`.

    Returns:
        ~chainer.Variable: A variable representing KL-divergence between
            given gaussian distribution and the standard gaussian.

    """
    assert isinstance(mean, variable.Variable)
    assert isinstance(ln_var, variable.Variable)

    J = mean.data.size
    var = F.exp(ln_var)
    return (F.sum(mean * mean) + F.sum(var) - F.sum(ln_var) - J) * 0.5
Example #2
0
File: models.py Project: kzky/works
    def __call__(self, x, y):
        """
        Parameters
        -----------------
        x: Variable
            Feature of unlabeled samples.
        y: Variable
            Feature of unlabeled samples.
        """
        
        g, x, y = F.broadcast(*[self.gamma, x, y])
        x_g = x * g
        y_g = y * g

        x_g_norm = F.sum(x_g**2, axis=1)  
        y_g_norm = F.sum(y_g**2, axis=1)
        x_g_y_g = F.linear(x_g, y_g)
        
        x_g_norm, x_g_y_g, y_g_norm = \
                                      F.broadcast(
                                          *[x_g_norm,
                                            x_g_y_g,
                                            F.expand_dims(y_g_norm, 1)])
        #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm))
        return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
Example #3
0
	def encode_output(self, x_input, layer=1):
		if layer == 1:
			return F.sum(self.C_1(x_input), axis=1)
		elif layer == 2:
			return F.sum(self.C_2(x_input), axis=1)
		elif layer == 3:
			return F.sum(self.C_3(x_input), axis=1)
Example #4
0
File: models.py Project: kzky/works
    def __call__(self, x, y):
        """
        Parameters
        -----------------
        x: Variable
            Feature of unlabeled samples.
        y: Variable
            Feature of unlabeled samples.
        """

        g = F.broadcast_to(
            F.gaussian(
                np.array([0], dtype=np.float32),
                np.array([np.exp(1)], dtype=np.float32)), x.shape)
            
        x_g = x * g
        y_g = y * g

        x_g_norm = F.sum(x_g**2, axis=1)  
        y_g_norm = F.sum(y_g**2, axis=1)
        x_g_y_g = F.linear(x_g, y_g)
        
        x_g_norm, x_g_y_g, y_g_norm = \
                                      F.broadcast(
                                          *[x_g_norm,
                                            x_g_y_g,
                                            F.expand_dims(y_g_norm, 1)])
        #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm))
        return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
Example #5
0
File: models.py Project: kzky/works
    def __call__(self, x, y):
        """
        Parameters
        -----------------
        x: Variable
            Feature of unlabeled samples.
        y: Variable
            Feature of unlabeled samples.
        """
        
        g, x, y = F.broadcast(*[self.gamma, x, y])
        x_g = x * g
        y_g = y * g

        x_g_norm = F.sum(x_g**2, axis=1)  
        y_g_norm = F.sum(y_g**2, axis=1)
        x_g_y_g = F.linear(x_g, y_g)
        
        x_g_norm, x_g_y_g, y_g_norm = \
                                      F.broadcast(
                                          *[x_g_norm,
                                            x_g_y_g,
                                            F.expand_dims(y_g_norm, 1)])
        #F.exp(- (x_g_norm - 2 * x_g_y_g+ y_g_norm))
        u = x_g_norm - 2 * x_g_y_g+ y_g_norm
        print(np.min(u.data))
        print(len((np.where(u.data < 0)[0])), np.prod(u.data.shape))
        time.sleep(0.5)
        return F.exp(- x_g_norm + 2 * x_g_y_g - y_g_norm)
Example #6
0
def bernoulli_nll(x, y):
    """Calculate negative log-likelihood of Bernoulli distribution.

    This function calculates negative log-likelihood on a Bernoulli
    distribution.

    .. math::

        -B(x; p) = -\\sum_i {x_i \\log(p_i) + (1 - x_i)\\log(1 - p_i)},

    where :math:`p = \\sigma(y)`, and :math:`\\sigma(\\cdot)` is a sigmoid
    funciton.

    .. note::

       As this funtion uses a sigmoid function, you can pass a result of
       fully-connected layer (that means :class:`Linear`) to this function
       directly.

    Args:
        x (~chainer.Variable): Input variable.
        y (~chainer.Variable): A variable representing the parameter of
            Bernoulli distribution.

    Returns:
        ~chainer.Variable: A variable representing negative log-likelihood.

    """
    assert isinstance(x, variable.Variable)
    assert isinstance(y, variable.Variable)

    return F.sum(F.softplus(-y)) + F.sum(y) - F.sum(y * x)
Example #7
0
File: models.py Project: kzky/works
    def __call__(self, x):
        """
        Parameters
        -----------------
        x: Variable
            Shape is 784 in case of MNIST
        """
        # Reset mid outputs
        mid_outputs = self.mid_outputs = []
        
        h = x
        for fc, bn in zip(self.fc_layers.values(), self.bn_layers.values()):
            z = fc(h)
            z_bn = bn(z, self.test)
            h = self.act(z_bn)

            shape = z.data.shape
            batch = shape[0]
            m, _ = F.broadcast(*[F.sum(z, 0) / batch, z])
            v, _ = F.broadcast(*[F.sum((z - m) ** 2, 0) / batch, z])
            
            #TODO: Add non-BN output
            mid_outputs.append((z - m) / v )

        return h
Example #8
0
File: models.py Project: kzky/works
 def __call__(self, d_gen, d=None):
     bs_gen = d_gen[0]
     if d:
         bs = d[0]
         return F.sum(F.log(d)) / bs + F.sum(F.log(1 - d_gen)) / bs_gen
     else:
         return F.sum(F.log(1 - d_gen)) / bs_gen
    def solve(self, x_seq, pos, neg, train=True, variablize=False, onebyone=True):
        if variablize:# If arguments are just arrays (not variables), make them variables
            x_seq = [chainer.Variable(x, volatile=not train) for x in x_seq]
            x_seq = [F.dropout(x, ratio=self.dropout_ratio, train=train) for x in x_seq]
            pos = self.act1(self.W_candidate(
                F.dropout(chainer.Variable(pos, volatile=not train),
                          ratio=self.dropout_ratio, train=train)))
            neg = self.act1(self.W_candidate(
                F.dropout(chainer.Variable(neg, volatile=not train),
                          ratio=self.dropout_ratio, train=train)))
        if onebyone and train:
            target_x_seq = [self.act1(self.W_candidate(x)) for x in x_seq[:4]]# 1,2,3,4,5-th targets
            onebyone_loss = 0.

        self.LSTM.reset_state()
        for i, x in enumerate(x_seq):
            h = self.LSTM( F.dropout(x, ratio=self.dropout_ratio, train=train) )
            if onebyone and train and target_x_seq[i+1:]:
                pos_score, neg_score = self.calculate_score(h, target_x_seq[i+1:], neg,
                                                            multipos=True)
                onebyone_loss += F.relu( self.margin - pos_score + neg_score )

        pos_score, neg_score = self.calculate_score(h, pos, neg)
        accum_loss = F.relu( self.margin - pos_score + neg_score )
        TorFs = sum(accum_loss.data < self.margin)
        
        if onebyone and train:
            return F.sum(accum_loss) + F.sum(onebyone_loss), TorFs
        else:
            return F.sum(accum_loss), TorFs
Example #10
0
 def loss_dis(self, dis, y_fake, y_real):
     batchsize = len(y_fake)
     L1=  F.sum(F.softplus(-y_real)) / batchsize
     L2 = F.sum(F.softplus(y_fake)) / batchsize
     loss = L1 + L2
     train_loss_dis.append(loss)
     return loss
 def loss_dis(self, dis, y_in, y_out):
     batchsize,_,w,h = y_in.data.shape
     L1 = F.sum(F.softplus(-y_in)) / batchsize / w / h
     L2 = F.sum(F.softplus(y_out)) / batchsize / w / h
     loss = L1 + L2
     chainer.report({'loss': loss}, dis)
     return loss
Example #12
0
File: models.py Project: kzky/works
 def __call__(self, y, hiddens=None, scale=True):
     ne_loss = 0
     
     # NE for hiddens
     if hiddens is not None:
         for h in hiddens:
             h_normalized = F.softmax(h)
             h_log_softmax = F.log_softmax(h)
             n = h.data.shape[0]
             l = - F.sum(h_normalized * h_log_softmax) / n 
             if scale:
                 d = np.prod(h.data.shape[1:])
                 l = l / d
             ne_loss += l
             
     # NE for output
     y_normalized = F.softmax(y)
     y_log_softmax = F.log_softmax(y)
     n = y.data.shape[0]
     l = - F.sum(y_normalized * y_log_softmax) / n 
     if scale:
         d = np.prod(y.data.shape[1:])
         l = l / d
     ne_loss += l
     return ne_loss
Example #13
0
 def loss_dis(self, dis, y_fake, y_real):
     batchsize = len(y_fake)
     L1 = F.sum(F.softplus(-y_real)) / batchsize
     L2 = F.sum(F.softplus(y_fake)) / batchsize
     loss = L1 + L2
     chainer.report({'loss': loss}, dis)
     return loss
Example #14
0
def channel_normalize(x, test=False):
    s0, s1, s2, s3 = x.data.shape
    cavg = F.reshape(F.sum(x, axis=1) / s1, (s0, 1, s2, s3))
    xavg = F.concat(s1 * [cavg])
    cvar = F.reshape(F.sum((x - xavg) ** 2, axis=1) / s1, (s0, 1, s2, s3))
    xvar = F.concat(s1 * [cvar])
    return (x - xavg) / (xvar + 1e-5) ** 0.5
    def test_backward_case1(self):
        vertices = [
            [-0.9, -0.9, 2.],
            [-0.8, 0.8, 1.],
            [0.8, 0.8, 0.5]]
        faces = [[0, 1, 2]]

        renderer = neural_renderer.Renderer()
        renderer.image_size = 64
        renderer.anti_aliasing = False
        renderer.perspective = False
        renderer.camera_mode = 'none'

        vertices = cp.array(vertices, 'float32')
        faces = cp.array(faces, 'int32')
        vertices, faces = utils.to_minibatch((vertices, faces))
        vertices = chainer.Variable(vertices)

        images = renderer.render_depth(vertices, faces)
        loss = cf.sum(cf.square(images[0, 15, 20] - 1))
        loss.backward()
        grad = vertices.grad.get()
        grad2 = np.zeros_like(grad)

        for i in range(3):
            for j in range(3):
                eps = 1e-3
                vertices2 = vertices.data.copy()
                vertices2[i, j] += eps
                images = renderer.render_depth(vertices2, faces)
                loss2 = cf.sum(cf.square(images[0, 15, 20] - 1))
                grad2[i, j] = ((loss2 - loss) / eps).data.get()

        chainer.testing.assert_allclose(grad, grad2, atol=1e-3)
Example #16
0
    def tv_norm(self, x):
        diffh = self.tvh(
            F.reshape(x, (3, 1, self.args.in_size, self.args.in_size)))
        diffw = self.tvw(
            F.reshape(x, (3, 1, self.args.in_size, self.args.in_size)))
        tv = (F.sum(diffh ** 2) + F.sum(diffw ** 2)) ** (self.args.beta / 2.)

        return tv
 def loss_dis2(self, dis2, y_in, y_out):
     batchsize,_,w,h = y_in.data.shape
     L1 = F.sum(F.softplus(-y_in)) / batchsize / w / h
     L2 = F.sum(F.softplus(y_out)) / batchsize / w / h
     loss = L1 + L2
     #chainer.report({'loss': loss}, dis2)
     #print("dis2", {'loss': loss})
     return loss
def cosine_similarity(x, y, eps=1e-6):
    n1, n2, n3 = x.data.shape
    _, m2, _ = y.data.shape
    z = F.batch_matmul(x, y, transb=True)
    x2 = F.broadcast_to(F.reshape(F.sum(x * x, axis=2), (n1, n2, 1)), (n1, n2, m2))
    y2 = F.broadcast_to(F.reshape(F.sum(y * y, axis=2), (n1, 1, m2)), (n1, n2, m2))
    z /= F.exp(F.log(x2 * y2 + eps) / 2)
    return z
Example #19
0
File: model.py Project: emakryo/mdn
 def __call__(self, x, y):
     h = F.sigmoid(self.l1_(x))
     coef = F.softmax(self.coef_(h))
     mean = F.reshape(self.mean_(h), (-1,self.NUM_MIXTURE,self.OUT_DIM))
     logvar = self.logvar_(h)
     mean, y = F.broadcast(mean, F.reshape(y, (-1,1,self.OUT_DIM)))
     return F.sum(
         coef*F.exp(-0.5*F.sum((y-mean)**2, axis=2)*F.exp(-logvar))/
         ((2*np.pi*F.exp(logvar))**(0.5*self.OUT_DIM)),axis=1)
Example #20
0
    def logli(self, a):
        a = F.cast(a, np.float32)
        # transform back to standard normal
        zs = (a - self.means) * F.exp(-self.log_stds)

        # density of standard normal: f(z) = (2*pi*det|Σ|)^(-n/2) * exp(-|x|^2/2)
        # the return value should be log f(z)
        return - F.sum(self.log_stds, axis=-1) - \
            0.5 * F.sum(F.square(zs), axis=-1) - \
            0.5 * self.means.shape[-1] * np.log(2 * np.pi)
Example #21
0
 def __accuracy(self, y, t):
     xp = self.xp
     b, c, n = y.data.shape
     v = np.arange(c, dtype=np.float32).reshape((1, -1, 1)).repeat(b, axis=0).repeat(n, axis=2)
     v = Variable(xp.asarray(v), volatile=True)
     r = F.sum(v * F.softmax(Variable(y.data, volatile=True)), axis=1)
     c = Variable(t.data >= 0, volatile=True)
     t = Variable(t.data.astype(np.float32), volatile=True)
     r = F.where(c, r, t)
     return F.sum(((r - t) * self.rating_unit) ** 2)
Example #22
0
    def norm_by_freq(self, freq):
        word_embs = self.W
        mean = F.sum(freq * word_embs, axis=0, keepdims=True)
        mean = F.broadcast_to(mean, word_embs.shape)
        var = F.sum(freq * ((word_embs - mean) ** 2), axis=0, keepdims=True)
        var = F.broadcast_to(var, word_embs.shape)

        stddev = F.sqrt(1e-6 + var)
        word_embs_norm = (word_embs - mean) / stddev
        return word_embs_norm
Example #23
0
File: losses.py Project: kzky/works
 def __call__(self, d_x_gen, d_x_real=None):
     bs_d_x_gen = d_x_gen.shape[0]
     if d_x_real is not None:
         bs_d_x_real = d_x_real.shape[0]
         loss = F.sum(d_x_real) / bs_d_x_real - F.sum(d_x_gen) / bs_d_x_gen
         return  - loss  # to minimize
         
     else:
         loss = F.sum(d_x_gen) / bs_d_x_gen
         return - loss  # to minimize (reverse trick)
Example #24
0
 def __call__(self, x, z, test=False):
     if self.nolin:
         h = x
     else:
         h = self.lin(x)
     mu = F.sum(h, axis=0)/h.data.shape[0]
     self.mu = F.broadcast(F.reshape(mu, (1,h.data.shape[1])),h)[0]
     vr = (F.sum((h-self.mu)*(h-self.mu), axis=0)/h.data.shape[0])**0.5
     self.vr = F.broadcast(F.reshape(vr, (1,h.data.shape[1])),h)[0]
     bnh = (h-self.mu)/(self.vr+1e-7)
     return self.comb(bnh, z)
Example #25
0
 def forward(self, ids, bow):
     bow, ids = utils.move(self.xp, bow, ids)
     proportions = self.proportions(ids)
     ld = dirichlet_likelihood(proportions)
     doc = F.matmul(F.softmax(proportions), self.factors())
     logp = F.dropout(self.embedding(doc))
     # loss = -F.sum(bow * F.log_softmax(logp))
     sources, targets, counts = [], [], []
     lpi =  F.sum(bow * F.log_softmax(logp), axis=1)
     loss = -F.sum(lpi)
     return loss, ld
Example #26
0
File: losses.py Project: kzky/works
 def __call__(self, d_x_gen, d_x_real=None):
     bs_d_x_gen = d_x_gen.shape[0]
     if d_x_real is not None:
         bs_d_x_real = d_x_real.shape[0]
         loss = F.sum(F.square(d_x_real - 1)) / bs_d_x_real /2 \
                + F.sum(F.square(d_x_gen)) / bs_d_x_gen / 2
         return loss
         
     else:
         loss = F.sum(F.square(d_x_gen - 1)) / bs_d_x_gen / 2
         return loss
Example #27
0
File: losses.py Project: kzky/works
 def __call__(self, d_x_gen, d_x=None):
     #TODO: reverse trick
     bs_d_x_gen = d_x_gen.shape[0]
     if d_x is not None:
         bs_d_x = d_x.shape[0]
         loss = F.sum(F.log(F.sigmoid(d_x))) / bs_d_x \
                + F.sum(F.log(1 - F.sigmoid(d_x_gen))) / bs_d_x_gen
         return - loss  # to minimize
         
     else:
         loss = F.sum(F.log(1 - F.sigmoid(d_x_gen))) / bs_d_x_gen
         return loss
Example #28
0
 def __call__(self, x, eta, test=False):
     h = self.lin(x)
     mu = F.sum(h, axis=0)/h.data.shape[0]
     self.mu = F.broadcast(F.reshape(mu, (1,h.data.shape[1])),h)[0]
     vr = (F.sum((h-self.mu)*(h-self.mu), axis=0)/h.data.shape[0])**0.5
     self.vr = F.broadcast(F.reshape(vr, (1,h.data.shape[1])),h)[0]
     bnh = (h-self.mu)/(self.vr+1e-7)
     z = bnh + xp.random.randn(x.data.shape[0], self.n_out)*eta
     if self.act is None:
         return z, F.broadcast(self.gamma.W, z)[0]*(z + F.broadcast(self.beta.W, z)[0])
     else:
         return z, self.act(F.broadcast(self.gamma.W, z)[0]*(z + F.broadcast(self.beta.W, z)[0]))
Example #29
0
File: net.py Project: hvy/chainer
    def __call__(self, x):
        q_z = self.encoder(x)
        z = q_z.sample(self.k)
        p_x = self.decoder(z)
        p_z = self.prior()

        reconstr = F.mean(F.sum(p_x.log_prob(
            F.broadcast_to(x[None, :], (self.k,) + x.shape)), axis=-1))
        kl_penalty = F.mean(F.sum(chainer.kl_divergence(q_z, p_z), axis=-1))
        loss = - (reconstr - self.beta * kl_penalty)
        reporter.report({'loss': loss}, self)
        reporter.report({'reconstr': reconstr}, self)
        reporter.report({'kl_penalty': kl_penalty}, self)
        return loss
Example #30
0
File: losses.py Project: kzky/works
    def __call__(self, y0, y1):
        bs = y0.data.shape[0]
        d = np.prod(y0.data.shape[1:])

        y0_softmax = F.softmax(y0)
        y1_softmax = F.softmax(y1)

        y0_log_softmax = F.log_softmax(y0)
        y1_log_softmax = F.log_softmax(y1)

        kl0 = F.sum(y0_softmax * (y0_log_softmax - y1_log_softmax)) / bs / d
        kl1 = F.sum(y1_softmax * (y1_log_softmax - y0_log_softmax)) / bs / d

        return (kl0 + kl1) / 2
Example #31
0
 def loss_gen(self, gen, y_fake, mse):
     batchsize = len(y_fake)
     loss = mse + self.alpha * F.sum(F.softplus(-y_fake)) / batchsize
     chainer.report({'loss': loss}, gen)
     return loss
Example #32
0
 def add(self, r_hat):
     self.sum += F.sum(r_hat).data
     self.n += r_hat.shape[0]
     return self.sum / self.n
Example #33
0
def clustering_loss(x, t, gamma, T=5):
    """Clustering loss function for metric learning.

    Args:
        x (~chainer.Variable):
            Feature vectors.
        t (~chainer.Variable):
            Class labels corresponding to x.
        gamma (~float):
            Hyperparameter gamma.
        T (int):
            Maximum number of iterations in Algorithm 2.

    Returns:
        ~chainer.Variable: Loss value.

    See: `Learnable Structured Clustering Framework for Deep Metric Learning \
         <https://arxiv.org/abs/1612.01213>`_

    """
    if not isinstance(x, chainer.Variable):
        x = chainer.Variable(x)
    if not isinstance(t, chainer.Variable):
        t = chainer.Variable(t)
    t_cpu = chainer.cuda.to_cpu(t.data).ravel()

    batch_size = len(t.data)
    num_classes = len(np.unique(t_cpu))

    v = list(range(batch_size))
    s = []

    # First, search the sub-optimal solution y_PAM of the clustering.
    # Note that this computation is done outside the computational graph.
    # Find an initial medoids of S_PAM by Algorithm 1 in the paper.
    D = distance_matrix(x.data)
    D = cuda.to_cpu(D)
    for _ in range(num_classes):
        # find an element in v which maximise a_function
        a_best = -np.inf
        for i in v:
            distances = D[s + [i]]
            g_s = distances.argmin(axis=0)
            f = -distances[g_s, range(batch_size)].sum()
            if f + gamma < a_best:  # skip if this is hopeless to be the best
                continue
            delta = 1.0 - normalized_mutual_info_score(t_cpu, g_s)
            a = f + gamma * delta
            if a > a_best:
                a_best = a
                i_best = i

        s.append(i_best)
        v.remove(i_best)

        # In order to speed-up by making skip to calculate NMI more frequently,
        # sort v in descending order by distances to their nearest medoid
        D_min = D[s].min(0)  # distance to the nearest medoid for each point
        sorted_order = np.argsort(D_min[v])[::-1]
        v = np.array(v)[sorted_order].tolist()

    # Refine S_PAM by Algorithm 2
    a_previous = a_best
    for t in range(T):
        np.random.shuffle(s)
        y_pam = np.array(s)[D[s].argmin(axis=0)]
        # since a column of D may have multiple zeros due to numerical errors,
        # ensure y_pam[j] == j, for each j \in s
        y_pam[s] = s
        for k in copy.copy(s):
            js = np.argwhere(y_pam == k).ravel()
            if len(js) == 1:
                continue
            D_k = D[:, js][js]
            fs = -D_k.sum(axis=1)
            j_max = fs.argmax()
            f_max = fs[j_max]
            s_except_k = copy.copy(s)
            s_except_k.remove(k)
            a_best = -np.inf
            for j, f in zip(js, fs):
                if f + gamma < f_max:
                    continue
                g_s_j = D[s_except_k + [j]].argmin(axis=0)
                delta = 1.0 - normalized_mutual_info_score(t_cpu, g_s_j)
                a = f + gamma * delta
                if a > a_best:
                    a_best = a
                    j_best = j
            s = s_except_k + [j_best]

        # stop if the score did not improve from the previous step
        distances = D[s]
        g_s = distances.argmin(axis=0)
        f = -distances[g_s, range(batch_size)].sum()
        delta = 1.0 - normalized_mutual_info_score(t_cpu, g_s)
        a = f + gamma * delta
        if a == a_previous:
            break
        a_previous = a
    s_pam = s

    # Here, compute the loss with S_PAM and its corresponding delta.
    y_pam = np.asarray(s_pam)[D[s_pam].argmin(axis=0)].tolist()

    y_star = np.empty_like(t_cpu)
    for c in np.unique(t_cpu):
        js = np.argwhere(t_cpu == c).ravel()  # indexes of examples of class c
        D_c = D[:, js][js]
        fs = D_c.sum(axis=1)
        y_star_c = js[fs.argmin()]
        y_star[js] = y_star_c

    f = -F.sum(F.batch_l2_norm_squared(x - x[y_pam]))
    f_tilde = -F.sum(F.batch_l2_norm_squared(x - x[y_star]))
    loss = F.relu(f + gamma * delta - f_tilde)
    return loss
Example #34
0
 def loss_gen(self, gen, y_fake):
     batchsize = len(y_fake)
     #G(D(z), z)->1
     loss = F.sum(F.softplus(-y_fake)) / batchsize
     chainer.report({'loss': loss}, gen)
     return loss
Example #35
0
 def loss_gen(self, gen, y_fake):
     batchsize = y_fake.data.shape[0]
     loss = F.sum(F.softplus(-y_fake)) / batchsize
     chainer.report({'loss': loss}, gen)
     return loss
Example #36
0
 def compute_kld(self, p, q):
     assert p.shape[0] == q.shape[0]
     return functions.reshape(
         functions.sum(
             p * (functions.log(p + 1e-16) - functions.log(q + 1e-16)),
             axis=1), (-1, 1))
Example #37
0
File: models.py Project: kzky/works
 def __call__(self, mu, sigma_2, log_sigma_2):
     bs = mu.shape[0]
     kl = F.sum(1 + log_sigma_2 - mu**2 -
                sigma_2) / 2 / bs  # Explicit KL form
     kl = -kl  # maximize kl means to minimize -kl
     return kl
Example #38
0
def main(id):
    model_path = "/efs/fMRI_AE/SimpleFCAE_E32D32/model/model_iter_108858"

    gpu = 0
    get_device_from_id(gpu).use()
    """NibDataset
    def __init__(self, directory: str, crop: list):
    """
    crop = [[9, 81], [11, 99], [0, 80]]
    test_dataset = NibDataset("/data/test", crop=crop)

    mask = load_mask_nib("/data/mask/average_optthr.nii", crop)
    """SimpleFCAE_E32D32
    def __init__(self, mask, r: int, in_mask: str, out_mask: str):
    """
    model = Model(mask, 2, "mask", "mask")
    load_npz(model_path, model)
    model.to_gpu()

    # feature_idx = 0
    # feature_idx = (0, 4, 5, 5) # == [0, 9/2, 11/2, 10/2]
    # feature_idx = (0, 1, 1, 1)
    feature_idx = (0, 2, 7, 4)
    resample_size = 100
    batch_size = 10
    noise_level = 0.2

    for i in range(len(test_dataset)):
        if i % 8 != id:
            continue
        print("{:4}/{:4}".format(i, len(test_dataset)))
        subject = test_dataset.get_subject(i)
        frame = test_dataset.get_frame(i)
        test_img = xp.asarray(test_dataset[i])

        resample_remain = resample_size
        resample_processed = 0
        ret = xp.zeros(test_img.shape)
        while resample_remain > 0:
            batch_size_this_loop = min(batch_size, resample_remain)
            resample_remain -= batch_size_this_loop

            batch = xp.broadcast_to(
                test_img, chain((batch_size_this_loop, ), test_img.shape))
            sigma = noise_level / (xp.max(test_img) - xp.min(test_img))
            batch += sigma * xp.random.randn(*batch.shape)

            x = Variable(batch)

            feature = model.extract(x)
            assert feature.shape == (batch_size, 1, 9, 11, 10)
            feature = F.sum(feature, axis=0)
            assert feature.shape == (1, 9, 11, 10)
            feature = F.get_item(feature, feature_idx)
            feature.backward()
            grad = xp.mean(x.grad, axis=0)
            ret = (ret * resample_processed + grad * batch_size_this_loop) / (
                resample_processed + batch_size_this_loop)
            model.cleargrads()

        xp.save(
            "/efs/fMRI_AE/SimpleFCAE_E32D32/grad/sensitivity_map_feature_{}_{}_{}_subject{:03d}_frame{:03d}"
            .format(feature_idx[1], feature_idx[2], feature_idx[3], subject,
                    frame), ret)
Example #39
0
    def __call__(self, enc_hs, dec_z, att_prev, scaling=2.0):
        """Compute AttLoc forward layer.

        Args:
            enc_hs (chainer.Variable | N-dimensional array):
                Input variable from encoders.
            dec_z (chainer.Variable | N-dimensional array): Input variable of decoder.
            att_prev (chainer.Variable | None): Attention weight.
            scaling (float): Scaling weight to make attention sharp.

        Returns:
            chainer.Variable: Weighted sum over flames.
            chainer.Variable: Attention weight.

        """
        batch = len(enc_hs)
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None:
            self.enc_h = F.pad_sequence(enc_hs)  # utt x frame x hdim
            self.h_length = self.enc_h.shape[1]
            # utt x frame x att_dim
            self.pre_compute_enc_h = self.mlp_enc(self.enc_h, n_batch_axes=2)

        if dec_z is None:
            dec_z = chainer.Variable(
                self.xp.zeros((batch, self.dunits), dtype=np.float32)
            )
        else:
            dec_z = dec_z.reshape(batch, self.dunits)

        # initialize attention weight with uniform dist.
        if att_prev is None:
            att_prev = [
                self.xp.full(hh.shape[0], 1.0 / hh.shape[0], dtype=np.float32)
                for hh in enc_hs
            ]
            att_prev = [chainer.Variable(att) for att in att_prev]
            att_prev = F.pad_sequence(att_prev)

        # att_prev: utt x frame -> utt x 1 x 1 x frame
        # -> utt x att_conv_chans x 1 x frame
        att_conv = self.loc_conv(att_prev.reshape(batch, 1, 1, self.h_length))
        # att_conv: utt x att_conv_chans x 1 x frame -> utt x frame x att_conv_chans
        att_conv = F.swapaxes(F.squeeze(att_conv, axis=2), 1, 2)
        # att_conv: utt x frame x att_conv_chans -> utt x frame x att_dim
        att_conv = self.mlp_att(att_conv, n_batch_axes=2)

        # dec_z_tiled: utt x frame x att_dim
        dec_z_tiled = F.broadcast_to(
            F.expand_dims(self.mlp_dec(dec_z), 1), self.pre_compute_enc_h.shape
        )

        # dot with gvec
        # utt x frame x att_dim -> utt x frame
        # TODO(watanabe) use batch_matmul
        e = F.squeeze(
            self.gvec(
                F.tanh(att_conv + self.pre_compute_enc_h + dec_z_tiled), n_batch_axes=2
            ),
            axis=2,
        )
        # Applying a minus-large-number filter
        # to make a probability value zero for a padded area
        # simply degrades the performance, and I gave up this implementation
        # Apply a scaling to make an attention sharp
        w = F.softmax(scaling * e)

        # weighted sum over flames
        # utt x hdim
        c = F.sum(
            self.enc_h * F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape), axis=1
        )

        return c, w
Example #40
0
 def forward(self, xs):
     h = self.l(xs)
     h = self.half(h)
     return F.sum(chainer.as_variable(h))
Example #41
0
def loss_func_dcgan_dis_fake(h):
    return F.sum(F.softplus(h)) / np.prod(h.data.shape)
Example #42
0
def adv_loss(y, alpha=1.0):
    a, p, n = F.split_axis(y, 3, axis=0)
    distance = -F.sum((a - p)**2.0, axis=1) + F.sum(
        (a - n)**2.0, axis=1) - alpha

    return F.average(F.relu(distance)) / 2
Example #43
0
 def __call__(self, obs):
     action_distrib = self.pi(obs)
     action_value = self.q(obs)
     v = F.sum(action_distrib.all_prob * action_value.q_values, axis=1)
     return action_distrib, action_value, v
Example #44
0
 def mse_gen(self, x, m, c):
     return F.sum(
         F.batch_l2_norm_squared(F.broadcast_to(m, x.shape) *
                                 (c - x))) / len(x)
Example #45
0
    def __call__(self, input_x, t, ignore_t):
        if isinstance(input_x, chainer.Variable):
            device = cuda.get_device(input_x.data)
        else:
            device = cuda.get_device(input_x)
        xp = self.predictor.xp
        with device:
            output = self.predictor(input_x)
            batch_size, _, grid_h, grid_w = output.shape
            self.seen += batch_size
            x, y, w, h, conf, prob = F.split_axis(F.reshape(
                output, (batch_size, self.predictor.n_boxes,
                         self.predictor.n_classes + 5, grid_h, grid_w)),
                                                  (1, 2, 3, 4, 5),
                                                  axis=2)
            x = F.sigmoid(x)
            y = F.sigmoid(y)
            conf = F.sigmoid(conf)
            prob = F.transpose(prob, (0, 2, 1, 3, 4))
            prob = F.softmax(prob)

            # training labels
            tw = np.zeros(w.shape, dtype=np.float32)
            th = np.zeros(h.shape, dtype=np.float32)
            tx = np.tile(0.5, x.shape).astype(np.float32)
            ty = np.tile(0.5, y.shape).astype(np.float32)

            # set low learning rate for bounding boxes that have no object
            if self.seen < self.unstable_seen:
                box_learning_scale = np.tile(0.1, x.shape).astype(np.float32)
            else:
                box_learning_scale = np.tile(0, x.shape).astype(np.float32)

            tconf = np.zeros(conf.shape, dtype=np.float32)
            conf_learning_scale = np.tile(0.1, conf.shape).astype(np.float32)

            tprob = prob.data.copy()

            x_shift = np.broadcast_to(np.arange(grid_w, dtype=np.float32),
                                      x.shape[1:])
            y_shift = np.broadcast_to(
                np.arange(grid_h, dtype=np.float32).reshape(grid_h, 1),
                y.shape[1:])
            w_anchor = np.broadcast_to(
                np.reshape(
                    np.array(self.anchors, dtype=np.float32)[:, 0],
                    (self.predictor.n_boxes, 1, 1, 1)), w.shape[1:])
            h_anchor = np.broadcast_to(
                np.reshape(
                    np.array(self.anchors, dtype=np.float32)[:, 1],
                    (self.predictor.n_boxes, 1, 1, 1)), h.shape[1:])
            x_data = cuda.to_cpu(x.data)
            y_data = cuda.to_cpu(y.data)
            w_data = cuda.to_cpu(w.data)
            h_data = cuda.to_cpu(h.data)
            best_ious = []
            for batch in range(batch_size):
                n_truth_boxes = len(t[batch])
                box_x = (x_data[batch] + x_shift) / grid_w
                box_y = (y_data[batch] + y_shift) / grid_h
                box_w = np.exp(w_data[batch]) * w_anchor / grid_w
                box_h = np.exp(h_data[batch]) * h_anchor / grid_h

                ious = []
                for truth_index in range(n_truth_boxes):
                    truth_box_x = np.broadcast_to(
                        np.array(t[batch][truth_index]["x"], dtype=np.float32),
                        box_x.shape)
                    truth_box_y = np.broadcast_to(
                        np.array(t[batch][truth_index]["y"], dtype=np.float32),
                        box_y.shape)
                    truth_box_w = np.broadcast_to(
                        np.array(t[batch][truth_index]["w"], dtype=np.float32),
                        box_w.shape)
                    truth_box_h = np.broadcast_to(
                        np.array(t[batch][truth_index]["h"], dtype=np.float32),
                        box_h.shape)
                    ious.append(
                        multi_box_iou(
                            Box(box_x, box_y, box_w, box_h),
                            Box(truth_box_x, truth_box_y, truth_box_w,
                                truth_box_h)))
                if len(ious) > 0:
                    ious = np.asarray(ious)
                    best_ious.append(np.max(ious, axis=0))
                else:
                    best_ious.append(np.zeros_like(x_data[0]))
            best_ious = np.array(best_ious)

            # keep confidence of anchor that has more confidence than threshold
            tconf[best_ious > self.thresh] = conf.data.get()[
                best_ious > self.thresh]
            conf_learning_scale[best_ious > self.thresh] = 0

            # ignored regions are not considered either positive or negative

            best_ious = []
            for batch in range(batch_size):
                n_truth_boxes = len(ignore_t[batch])
                box_x = (x_data[batch] + x_shift) / grid_w
                box_y = (y_data[batch] + y_shift) / grid_h
                box_w = np.exp(w_data[batch]) * w_anchor / grid_w
                box_h = np.exp(h_data[batch]) * h_anchor / grid_h

                ious = []
                for truth_index in range(n_truth_boxes):
                    truth_box_x = np.broadcast_to(
                        np.array(ignore_t[batch][truth_index]["x"],
                                 dtype=np.float32), box_x.shape)
                    truth_box_y = np.broadcast_to(
                        np.array(ignore_t[batch][truth_index]["y"],
                                 dtype=np.float32), box_y.shape)
                    truth_box_w = np.broadcast_to(
                        np.array(ignore_t[batch][truth_index]["w"],
                                 dtype=np.float32), box_w.shape)
                    truth_box_h = np.broadcast_to(
                        np.array(ignore_t[batch][truth_index]["h"],
                                 dtype=np.float32), box_h.shape)
                    ious.append(
                        multi_box_iou(
                            Box(box_x, box_y, box_w, box_h),
                            Box(truth_box_x, truth_box_y, truth_box_w,
                                truth_box_h)))
                if len(ious) > 0:
                    ious = np.asarray(ious)
                    best_ious.append(np.max(ious, axis=0))
                else:
                    best_ious.append(np.zeros_like(x_data[0]))
            best_ious = np.array(best_ious)

            # do not update confidence for ignored regions
            tconf[best_ious > self.ignore_thresh] = conf.data.get()[
                best_ious > self.ignore_thresh]
            conf_learning_scale[best_ious > self.ignore_thresh] = 0

            # adjust x, y, w, h, conf, prob of anchor boxes that have objects
            abs_anchors = self.anchors / np.array([grid_w, grid_h])
            for batch in range(batch_size):
                for truth_box in t[batch]:
                    truth_w = int(float(truth_box["x"]) * grid_w)
                    truth_h = int(float(truth_box["y"]) * grid_h)
                    truth_n = 0
                    best_iou = 0.0
                    for anchor_index, abs_anchor in enumerate(abs_anchors):
                        iou = box_iou(
                            Box(0, 0, float(truth_box["w"]),
                                float(truth_box["h"])),
                            Box(0, 0, abs_anchor[0], abs_anchor[1]))
                        if best_iou < iou:
                            best_iou = iou
                            truth_n = anchor_index

                    box_learning_scale[batch, truth_n, :, truth_h,
                                       truth_w] = 1.0
                    tx[batch, truth_n, :, truth_h,
                       truth_w] = float(truth_box["x"]) * grid_w - truth_w
                    ty[batch, truth_n, :, truth_h,
                       truth_w] = float(truth_box["y"]) * grid_h - truth_h
                    tw[batch, truth_n, :, truth_h, truth_w] = np.log(
                        float(truth_box["w"]) / abs_anchors[truth_n][0])
                    th[batch, truth_n, :, truth_h, truth_w] = np.log(
                        float(truth_box["h"]) / abs_anchors[truth_n][1])
                    tprob[batch, :, truth_n, truth_h, truth_w] = 0
                    tprob[batch,
                          int(truth_box["label"]), truth_n, truth_h,
                          truth_w] = 1

                    full_truth_box = Box(float(truth_box["x"]),
                                         float(truth_box["y"]),
                                         float(truth_box["w"]),
                                         float(truth_box["h"]))
                    predicted_box = Box(
                        (x[batch][truth_n][0][truth_h][truth_w].data.get() +
                         truth_w) / grid_w,
                        (y[batch][truth_n][0][truth_h][truth_w].data.get() +
                         truth_h) / grid_h,
                        np.exp(
                            w[batch][truth_n][0][truth_h][truth_w].data.get())
                        * abs_anchors[truth_n][0],
                        np.exp(
                            h[batch][truth_n][0][truth_h][truth_w].data.get())
                        * abs_anchors[truth_n][1])
                    predicted_iou = box_iou(full_truth_box, predicted_box)
                    tconf[batch, truth_n, :, truth_h, truth_w] = predicted_iou
                    conf_learning_scale[batch, truth_n, :, truth_h,
                                        truth_w] = 10.0

            tx = cuda.to_gpu(tx)
            ty = cuda.to_gpu(ty)
            tw = cuda.to_gpu(tw)
            th = cuda.to_gpu(th)
            tconf = cuda.to_gpu(tconf)
            tprob = cuda.to_gpu(tprob)

            box_learning_scale = cuda.to_gpu(box_learning_scale)
            conf_learning_scale = cuda.to_gpu(conf_learning_scale)

            x_loss = F.sum((tx - x)**2 * box_learning_scale) / 2
            y_loss = F.sum((ty - y)**2 * box_learning_scale) / 2
            w_loss = F.sum((tw - w)**2 * box_learning_scale) / 2
            h_loss = F.sum((th - h)**2 * box_learning_scale) / 2
            c_loss = F.sum((tconf - conf)**2 * conf_learning_scale) / 2
            p_loss = F.sum((tprob - prob)**2) / 2
            return x_loss, y_loss, w_loss, h_loss, c_loss, p_loss
Example #46
0
def _readout_sum(x):
    y = functions.sum(x, axis=1)  # sum along node axis
    return y
    def handle_gpu_batch(self, epoch, batches_passed, batch_start_time, k, g, \
                            att_images, att_images_mix, \
                            att_images_multi, att_images_multi_mix, \
                            att_images_real_multi, \
                            joints, batch_one_hot, \
                            objects, objs_one_hot, sentence, descriptions, descriptions_one_hot):

        xp = cuda.cupy
        cuda.get_device(g).use()
        self.enc_models[k].cleargrads()
        self.att_enc_models[k].cleargrads()
        self.att_gen_models[k].cleargrads()
        self.dis_models[k].cleargrads()
        self.mdn_models[k].cleargrads()
        self.reset_all([self.mdn_models[k]])

        gpu_batch_size = self.batch_size // GPU.num_gpus

        att_images = att_images[k * gpu_batch_size:(k + 1) * gpu_batch_size]
        att_images_mix = att_images_mix[k * gpu_batch_size:(k + 1) *
                                        gpu_batch_size]
        att_images_multi = att_images_multi[k * gpu_batch_size:(k + 1) *
                                            gpu_batch_size]
        att_images_multi_mix = att_images_multi_mix[k *
                                                    gpu_batch_size:(k + 1) *
                                                    gpu_batch_size]
        att_images_real_multi = att_images_real_multi[k *
                                                      gpu_batch_size:(k + 1) *
                                                      gpu_batch_size]

        objects = np.asarray(objects[k * gpu_batch_size:(k + 1) *
                                     gpu_batch_size],
                             dtype=np.int32)
        objects = np.repeat(objects[:, np.newaxis], self.sequence_size, axis=1)

        objs_one_hot = np.asarray(objs_one_hot[k * gpu_batch_size:(k + 1) *
                                               gpu_batch_size],
                                  dtype=np.float32)
        objs_one_hot = np.repeat(objs_one_hot[:, np.newaxis],
                                 self.sequence_size,
                                 axis=1)

        descriptions = np.asarray(descriptions[k * gpu_batch_size:(k + 1) *
                                               gpu_batch_size],
                                  dtype=np.int32)
        descriptions = np.repeat(descriptions[:, np.newaxis],
                                 self.sequence_size,
                                 axis=1)

        descriptions_one_hot = np.asarray(
            descriptions_one_hot[k * gpu_batch_size:(k + 1) * gpu_batch_size],
            dtype=np.float32)
        descriptions_one_hot = np.repeat(descriptions_one_hot[:, np.newaxis],
                                         self.sequence_size,
                                         axis=1)

        att_images = att_images.transpose(1, 0, 2, 3, 4)
        att_images_mix = att_images_mix.transpose(1, 0, 2, 3, 4)
        att_images_multi = att_images_multi.transpose(1, 0, 2, 3, 4)
        att_images_multi_mix = att_images_multi_mix.transpose(1, 0, 2, 3, 4)
        att_images_real_multi = att_images_real_multi.transpose(1, 0, 2, 3, 4)

        objects = objects.transpose(1, 0)
        objs_one_hot = objs_one_hot.transpose(1, 0, 2)
        descriptions = descriptions.transpose(1, 0)
        descriptions_one_hot = descriptions_one_hot.transpose(1, 0, 2)

        objects = np.squeeze(
            np.reshape(objects, (self.sequence_size * gpu_batch_size, -1)))
        objs_one_hot = np.squeeze(
            np.reshape(objs_one_hot,
                       (self.sequence_size * gpu_batch_size, -1)))
        descriptions = np.squeeze(
            np.reshape(descriptions,
                       (self.sequence_size * gpu_batch_size, -1)))
        descriptions_one_hot = np.squeeze(
            np.reshape(descriptions_one_hot,
                       (self.sequence_size * gpu_batch_size, -1)))

        joints = joints.transpose(1, 0, 2)
        joints = np.asarray(joints[:, k * gpu_batch_size:(k + 1) *
                                   gpu_batch_size],
                            dtype=np.float32)
        joints = Variable(cuda.to_gpu(joints, g))

        batch_one_hot = np.asarray(batch_one_hot[k * gpu_batch_size:(k + 1) *
                                                 gpu_batch_size],
                                   dtype=np.float32)
        batch_one_hot = np.repeat(batch_one_hot[np.newaxis],
                                  self.sequence_size,
                                  axis=0)
        batch_one_hot = np.reshape(batch_one_hot,
                                   (self.sequence_size * gpu_batch_size, 4))
        batch_one_hot = Variable(cuda.to_gpu(batch_one_hot, g))

        att_images = np.reshape(
            att_images,
            (-1, self.num_channels, self.image_size, self.image_size))
        x_in_att = Variable(
            cuda.to_gpu(np.asarray(att_images, dtype=np.float32), g))
        att_images_mix = np.reshape(
            att_images_mix,
            (-1, self.num_channels, self.image_size, self.image_size))
        x_in_att_mix = Variable(
            cuda.to_gpu(np.asarray(att_images_mix, dtype=np.float32), g))
        att_images_multi = np.reshape(
            att_images_multi,
            (-1, self.num_channels, self.image_size, self.image_size))
        x_in_att_multi = Variable(
            cuda.to_gpu(np.asarray(att_images_multi, dtype=np.float32), g))
        att_images_multi_mix = np.reshape(
            att_images_multi_mix,
            (-1, self.num_channels, self.image_size, self.image_size))
        x_in_att_multi_mix = Variable(
            cuda.to_gpu(np.asarray(att_images_multi_mix, dtype=np.float32), g))
        att_images_real_multi = np.reshape(
            att_images_real_multi,
            (-1, self.num_channels, self.image_size, self.image_size))
        x_in_att_real_multi = Variable(
            cuda.to_gpu(np.asarray(att_images_real_multi, dtype=np.float32),
                        g))

        objects_var = Variable(cuda.to_gpu(objects, g))
        desc_var = Variable(cuda.to_gpu(descriptions, g))
        objects_hot_var = Variable(cuda.to_gpu(objs_one_hot, g))
        desc_hot_var = Variable(cuda.to_gpu(descriptions_one_hot, g))

        att0, s0, c0 = self.enc_models[k](x_in_att,
                                          objects_hot_var,
                                          desc_hot_var,
                                          train=True)
        m_att0, m_s0, m_c0 = self.enc_models[k](x_in_att_mix,
                                                objects_hot_var,
                                                desc_hot_var,
                                                train=True)
        att00, s00, c00 = self.enc_models[k](x_in_att_multi,
                                             objects_hot_var,
                                             desc_hot_var,
                                             train=True)
        m_att00, m_s00, m_c00 = self.enc_models[k](x_in_att_multi_mix,
                                                   objects_hot_var,
                                                   desc_hot_var,
                                                   train=True)
        real_att0, real_s0, real_c0 = self.enc_models[k](x_in_att_real_multi,
                                                         objects_hot_var,
                                                         desc_hot_var,
                                                         train=True)

        l1_norm_att = F.sum(att0)
        l1_norm_att += F.sum(m_att0)
        l1_norm_att += F.sum(att00)
        l1_norm_att += F.sum(m_att00)
        l1_norm_att += F.sum(real_att0)
        l1_norm_att /= 5 * gpu_batch_size * self.sequence_size * self.att_size * self.att_size

        # att0 = F.normalize(att0, axis=1)
        att0 = F.reshape(att0, (-1, 1, self.att_size, self.att_size))
        att0 = F.resize_images(att0, (self.image_size, self.image_size))
        # m_att0 = F.normalize(m_att0, axis=1)
        m_att0 = F.reshape(m_att0, (-1, 1, self.att_size, self.att_size))
        m_att0 = F.resize_images(m_att0, (self.image_size, self.image_size))
        # att00 = F.normalize(att00, axis=1)
        att00 = F.reshape(att00, (-1, 1, self.att_size, self.att_size))
        att00 = F.resize_images(att00, (self.image_size, self.image_size))
        # m_att00 = F.normalize(m_att00, axis=1)
        m_att00 = F.reshape(m_att00, (-1, 1, self.att_size, self.att_size))
        m_att00 = F.resize_images(m_att00, (self.image_size, self.image_size))
        # real_att0 = F.normalize(real_att0, axis=1)
        real_att0 = F.reshape(real_att0, (-1, 1, self.att_size, self.att_size))
        real_att0 = F.resize_images(real_att0,
                                    (self.image_size, self.image_size))

        att_classification = F.softmax_cross_entropy(
            s0, objects_var) + F.softmax_cross_entropy(c0, desc_var)
        att_classification += F.softmax_cross_entropy(
            m_s0, objects_var) + F.softmax_cross_entropy(m_c0, desc_var)
        att_classification += F.softmax_cross_entropy(
            s00, objects_var) + F.softmax_cross_entropy(c00, desc_var)
        att_classification += F.softmax_cross_entropy(
            m_s00, objects_var) + F.softmax_cross_entropy(m_c00, desc_var)
        att_classification += F.softmax_cross_entropy(
            real_s0, objects_var) + F.softmax_cross_entropy(real_c0, desc_var)
        att_classification /= 10

        g1 = x_in_att * att0
        g2 = x_in_att_mix * m_att0
        g3 = x_in_att_multi * att00
        g4 = x_in_att_multi_mix * m_att00
        g5 = x_in_att_real_multi * real_att0

        att_similarity = F.mean_squared_error(g1, g2)
        att_similarity += F.mean_squared_error(g3, g4)

        cir_z, cir_mean, cir_var, _ = self.att_enc_models[k](g1, train=True)
        cir_z_m, cir_mean_m, cir_var_m, _ = self.att_enc_models[k](g2,
                                                                   train=True)
        cir_z0, cir_mean0, cir_var0, _ = self.att_enc_models[k](g3, train=True)
        cir_z0_m, cir_mean0_m, cir_var0_m, _ = self.att_enc_models[k](
            g4, train=True)
        cir_z_real, cir_mean_real, cir_var_real, _ = self.att_enc_models[k](
            g5, train=True)

        l_prior = F.gaussian_kl_divergence(cir_mean,
                                           cir_var) / (5 * self.normer)
        l_prior += F.gaussian_kl_divergence(cir_mean_m,
                                            cir_var_m) / (5 * self.normer)
        l_prior += F.gaussian_kl_divergence(cir_mean0,
                                            cir_var0) / (5 * self.normer)
        l_prior += F.gaussian_kl_divergence(cir_mean0_m,
                                            cir_var0_m) / (5 * self.normer)
        l_prior += F.gaussian_kl_divergence(cir_mean_real,
                                            cir_var_real) / (5 * self.normer)
        l_prior /= 5

        cir_x0 = self.att_gen_models[k](cir_z, train=True)
        cir_m_x0 = self.att_gen_models[k](cir_z_m, train=True)
        cir_x00 = self.att_gen_models[k](cir_z0, train=True)
        cir_m_x00 = self.att_gen_models[k](cir_z0_m, train=True)
        cir_real_x0 = self.att_gen_models[k](cir_z_real, train=True)

        reconstruction_loss = F.mean_squared_error(
            x_in_att, cir_x0[:, :3]) + F.mean_squared_error(
                x_in_att, cir_m_x0[:, :3])
        reconstruction_loss += F.mean_squared_error(
            x_in_att_multi, cir_x00[:, :3]) + F.mean_squared_error(
                x_in_att_multi, cir_m_x00[:, :3])

        reconstruction_loss_att = F.mean_squared_error(
            g1, cir_x0[:, 3:]) + F.mean_squared_error(g2, cir_m_x0[:, 3:])
        reconstruction_loss_att += F.mean_squared_error(
            g3, cir_x00[:, 3:]) + F.mean_squared_error(g4, cir_m_x00[:, 3:])

        reconstruction_loss /= 4
        reconstruction_loss_att /= 4
        reconstruction_loss_att *= 100

        s3, c3, l3 = self.dis_models[k](cir_x0[:, :3], train=True)
        m_s3, m_c3, m_l3 = self.dis_models[k](cir_m_x0[:, :3], train=True)
        s30, c30, l30 = self.dis_models[k](cir_x00[:, :3], train=True)
        m_s30, m_c30, m_l30 = self.dis_models[k](cir_m_x00[:, :3], train=True)
        m_s30_real, m_c30_real, m_l30_real = self.dis_models[k](
            cir_real_x0[:, :3], train=True)

        l_dis_rec_3 = F.softmax_cross_entropy(
            s3,
            Variable(
                cuda.to_gpu(
                    xp.zeros(gpu_batch_size * self.sequence_size).astype(
                        np.int32), g)))
        m_l_dis_rec_3 = F.softmax_cross_entropy(
            m_s3,
            Variable(
                cuda.to_gpu(
                    xp.zeros(gpu_batch_size * self.sequence_size).astype(
                        np.int32), g)))
        l_dis_rec3 = F.softmax_cross_entropy(
            s30,
            Variable(
                cuda.to_gpu(
                    xp.zeros(gpu_batch_size * self.sequence_size).astype(
                        np.int32), g)))
        m_l_dis_rec3 = F.softmax_cross_entropy(
            m_s30,
            Variable(
                cuda.to_gpu(
                    xp.zeros(gpu_batch_size * self.sequence_size).astype(
                        np.int32), g)))
        real_l_dis_rec3 = F.softmax_cross_entropy(
            m_s30_real,
            Variable(
                cuda.to_gpu(
                    xp.zeros(gpu_batch_size * self.sequence_size).astype(
                        np.int32), g)))

        l_dis_rec_3 += F.softmax_cross_entropy(
            c3,
            Variable(
                cuda.to_gpu(
                    xp.zeros(gpu_batch_size * self.sequence_size).astype(
                        np.int32), g)))
        m_l_dis_rec_3 += F.softmax_cross_entropy(
            m_c3,
            Variable(
                cuda.to_gpu(
                    xp.zeros(gpu_batch_size * self.sequence_size).astype(
                        np.int32), g)))
        l_dis_rec3 += F.softmax_cross_entropy(
            c30,
            Variable(
                cuda.to_gpu(
                    xp.zeros(gpu_batch_size * self.sequence_size).astype(
                        np.int32), g)))
        m_l_dis_rec3 += F.softmax_cross_entropy(
            m_c30,
            Variable(
                cuda.to_gpu(
                    xp.zeros(gpu_batch_size * self.sequence_size).astype(
                        np.int32), g)))
        real_l_dis_rec3 += F.softmax_cross_entropy(
            m_c30_real,
            Variable(
                cuda.to_gpu(
                    xp.zeros(gpu_batch_size * self.sequence_size).astype(
                        np.int32), g)))

        l_dis_fake = (l_dis_rec_3 + m_l_dis_rec_3 + l_dis_rec3 + m_l_dis_rec3 +
                      real_l_dis_rec3) / 10

        s2, c2, l2 = self.dis_models[k](x_in_att, train=True)
        s22, c22, l22 = self.dis_models[k](x_in_att_multi, train=True)

        l_dis_real = F.softmax_cross_entropy(s2, objects_var)
        l_dis_real += F.softmax_cross_entropy(s22, objects_var)
        l_dis_real += F.softmax_cross_entropy(c2, desc_var)
        l_dis_real += F.softmax_cross_entropy(c22, desc_var)
        l_dis_real /= 4

        l_feature_similarity = F.mean_squared_error(
            l3, l2) + F.mean_squared_error(m_l3, l2)
        l_feature_similarity += F.mean_squared_error(
            l30, l22) + F.mean_squared_error(m_l30, l22)
        l_feature_similarity /= 8

        text_encoding = F.concat(
            (batch_one_hot, objects_hot_var, desc_hot_var), axis=-1)
        text_encoding = F.reshape(text_encoding,
                                  (self.sequence_size, gpu_batch_size, -1))
        z_seq = F.reshape(
            cir_z, (self.sequence_size, gpu_batch_size, self.latent_size))
        z_seq_mix = F.reshape(
            cir_z_m, (self.sequence_size, gpu_batch_size, self.latent_size))
        mdn_loss, _ = self.mdn_models[k](task_encoding=text_encoding[0],
                                         image_encoding=z_seq[:-1],
                                         data_out=joints[1:],
                                         return_sample=False)
        mdn_loss_mix, _ = self.mdn_models[k](task_encoding=text_encoding[0],
                                             image_encoding=z_seq_mix[:-1],
                                             data_out=joints[1:],
                                             return_sample=False)
        robot_loss = (mdn_loss + mdn_loss_mix) / 2

        dis_loss = (l_dis_fake + 10 * l_dis_real) / (gpu_batch_size *
                                                     self.sequence_size)
        loss_classifier = att_classification
        loss_enc = 10 * l_prior + 10 * l_feature_similarity + 10 * att_similarity + 2 * l1_norm_att
        loss_gen = 2 * l_feature_similarity + 200 * reconstruction_loss - dis_loss
        loss_dis = dis_loss

        self.enc_models[k].cleargrads()
        self.att_enc_models[k].cleargrads()
        self.att_gen_models[k].cleargrads()
        self.mdn_models[k].cleargrads()
        loss_net = loss_enc + loss_gen + loss_classifier + robot_loss / 5
        loss_net.backward()

        g1.unchain_backward()
        g2.unchain_backward()
        g3.unchain_backward()
        g4.unchain_backward()
        g5.unchain_backward()
        reconstruction_loss_att.backward()

        cir_x0.unchain_backward()
        cir_m_x0.unchain_backward()
        cir_x00.unchain_backward()
        cir_m_x00.unchain_backward()
        cir_real_x0.unchain_backward()

        self.dis_models[k].cleargrads()
        loss_dis.backward()

        sys.stdout.write(
            '\r' + str(batches_passed) + '/' + str(1000) +
            ' time: {0:0.2f}, enc:{1:0.4f}, gen:{2:0.4f}, dis:{3:0.4f}, l_prior:{4:0.4f}, fea:{5:0.4f}, att_sim:{6:0.4f}, rec:{7:0.4f}, att_rec:{8:0.4f}, att_class:{9:0.4f}, norm:{10:0.4f}, mdn_loss:{11:0.4f}'
            .format(time.time() - batch_start_time, float(loss_enc.data),
                    float(loss_gen.data), float(loss_dis.data),
                    float(l_prior.data), float(l_feature_similarity.data),
                    float(att_similarity.data), float(
                        reconstruction_loss.data),
                    float(reconstruction_loss_att.data),
                    float(att_classification.data), float(l1_norm_att.data),
                    float(robot_loss.data)))
        sys.stdout.flush()  # important
    def original(self, hs, ys):
        '''Decoder forward

        :param Variable hs:
        :param Variable ys:
        :return:
        '''
        self.loss = None
        # prepare input and output word sequences with sos/eos IDs
        eos = self.xp.array([self.eos], 'i')
        sos = self.xp.array([self.sos], 'i')
        ys_in = [F.concat([sos, y], axis=0) for y in ys]
        ys_out = [F.concat([y, eos], axis=0) for y in ys]

        # padding for ys with -1
        # pys: utt x olen
        pad_ys_in = F.pad_sequence(ys_in, padding=self.eos)
        pad_ys_out = F.pad_sequence(ys_out, padding=-1)

        # get dim, length info
        batch = pad_ys_out.shape[0]
        olength = pad_ys_out.shape[1]
        logging.info(self.__class__.__name__ + ' input lengths:  ' + str(self.xp.array([h.shape[0] for h in hs])))
        logging.info(self.__class__.__name__ + ' output lengths: ' + str(self.xp.array([y.shape[0] for y in ys_out])))

        # initialization
        c_list = [None]  # list of cell state of each layer
        z_list = [None]  # list of hidden state of each layer
        for l in six.moves.range(1, self.dlayers):
            c_list.append(None)
            z_list.append(None)
        att_w = None
        z_all = []
        self.att.reset()  # reset pre-computation of h

        # pre-computation of embedding
        eys = self.embed(pad_ys_in)  # utt x olen x zdim
        eys = F.separate(eys, axis=1)

        # loop for an output sequence
        for i in six.moves.range(olength):
            att_c, att_w = self.att(hs, z_list[0], att_w)
            if i > 0 and random.random() < self.sampling_probability:
                logging.info(' scheduled sampling ')
                z_out = self.output(z_all[-1])
                z_out = F.argmax(F.log_softmax(z_out), axis=1)
                z_out = self.embed(z_out)
                ey = F.hstack((z_out, att_c))  # utt x (zdim + hdim)
            else:
                ey = F.hstack((eys[i], att_c))  # utt x (zdim + hdim)
            c_list[0], z_list[0] = self.lstm0(c_list[0], z_list[0], ey)
            for l in six.moves.range(1, self.dlayers):
                c_list[l], z_list[l] = self['lstm%d' % l](c_list[l], z_list[l], z_list[l - 1])
            z_all.append(z_list[-1])

        z_all = F.reshape(F.stack(z_all, axis=1),
                          (batch * olength, self.dunits))
        # compute loss
        y_all = self.output(z_all)
        self.loss = F.softmax_cross_entropy(y_all, F.flatten(pad_ys_out))
        # -1: eos, which is removed in the loss computation
        self.loss *= (np.mean([len(x) for x in ys_in]) - 1)
        acc = F.accuracy(y_all, F.flatten(pad_ys_out), ignore_label=-1)
        logging.info('att loss:' + str(self.loss.data))

        # show predicted character sequence for debug
        if self.verbose > 0 and self.char_list is not None:
            y_hat = F.reshape(y_all, (batch, olength, -1))
            y_true = pad_ys_out
            for (i, y_hat_), y_true_ in zip(enumerate(y_hat.data), y_true.data):
                if i == MAX_DECODER_OUTPUT:
                    break
                idx_hat = self.xp.argmax(y_hat_[y_true_ != -1], axis=1)
                idx_true = y_true_[y_true_ != -1]
                seq_hat = [self.char_list[int(idx)] for idx in idx_hat]
                seq_true = [self.char_list[int(idx)] for idx in idx_true]
                seq_hat = "".join(seq_hat).replace('<space>', ' ')
                seq_true = "".join(seq_true).replace('<space>', ' ')
                logging.info("groundtruth[%d]: " % i + seq_true)
                logging.info("prediction [%d]: " % i + seq_hat)

        if self.labeldist is not None:
            if self.vlabeldist is None:
                self.vlabeldist = chainer.Variable(self.xp.asarray(self.labeldist))
            loss_reg = - F.sum(F.scale(F.log_softmax(y_all), self.vlabeldist, axis=1)) / len(ys_in)
            self.loss = (1. - self.lsm_weight) * self.loss + self.lsm_weight * loss_reg

        return self.loss, acc
Example #49
0
 def compute_entropy(self, p):
     if p.ndim == 2:
         return -functions.sum(p * functions.log(p + 1e-16), axis=1)
     return -functions.sum(p * functions.log(p + 1e-16))
Example #50
0
    def __call__(self, enc_hs, dec_z, att_prev, scaling=2.0):
        '''AttLoc forward

        :param enc_hs:
        :param dec_z:
        :param att_prev:
        :param scaling:
        :return:
        '''
        batch = len(enc_hs)
        # pre-compute all h outside the decoder loop
        if self.pre_compute_enc_h is None:
            self.enc_h = F.pad_sequence(enc_hs)  # utt x frame x hdim
            self.h_length = self.enc_h.shape[1]
            # utt x frame x att_dim
            self.pre_compute_enc_h = linear_tensor(self.mlp_enc, self.enc_h)

        if dec_z is None:
            dec_z = chainer.Variable(
                self.xp.zeros((batch, self.dunits), dtype=np.float32))
        else:
            dec_z = F.reshape(dec_z, (batch, self.dunits))

        # initialize attention weight with uniform dist.
        if att_prev is None:
            att_prev = [
                self.xp.full(hh.shape[0], 1.0 / hh.shape[0], dtype=np.float32)
                for hh in enc_hs
            ]
            att_prev = [chainer.Variable(att) for att in att_prev]
            att_prev = F.pad_sequence(att_prev)

        # TODO(watanabe) use <chainer variable>.reshpae(), instead of F.reshape()
        # att_prev: utt x frame -> utt x 1 x 1 x frame -> utt x att_conv_chans x 1 x frame
        att_conv = self.loc_conv(
            F.reshape(att_prev, (batch, 1, 1, self.h_length)))
        # att_conv: utt x att_conv_chans x 1 x frame -> utt x frame x att_conv_chans
        att_conv = F.swapaxes(F.squeeze(att_conv, axis=2), 1, 2)
        # att_conv: utt x frame x att_conv_chans -> utt x frame x att_dim
        att_conv = linear_tensor(self.mlp_att, att_conv)

        # dec_z_tiled: utt x frame x att_dim
        dec_z_tiled = F.broadcast_to(F.expand_dims(self.mlp_dec(dec_z), 1),
                                     self.pre_compute_enc_h.shape)

        # dot with gvec
        # utt x frame x att_dim -> utt x frame
        # TODO(watanabe) use batch_matmul
        e = F.squeeze(linear_tensor(
            self.gvec,
            F.tanh(att_conv + self.pre_compute_enc_h + dec_z_tiled)),
                      axis=2)
        # Applying a minus-large-number filter to make a probability value zero for a padded area
        # simply degrades the performance, and I gave up this implementation
        # Apply a scaling to make an attention sharp
        w = F.softmax(scaling * e)

        # weighted sum over flames
        # utt x hdim
        c = F.sum(self.enc_h *
                  F.broadcast_to(F.expand_dims(w, 2), self.enc_h.shape),
                  axis=1)

        return c, w
Example #51
0
 def loss_enc(self, enc, y_real):
     batchsize = len(y_real)
     #G(x, E(x))->1
     loss = F.sum(F.softplus(y_real)) / batchsize
     chainer.report({'loss': loss}, enc)
     return loss
def sigmoid_cross_entropy(x, z):
    return F.sum(F.relu(x) - x * z + F.log(1 + F.exp(-abs(x))))
Example #53
0
    def __call__(self, x, test=False):
        x = self.encode(x, test)

        x = F.sum(x, axis=0) / x.shape[0]
        return F.squeeze(x)
Example #54
0
        dis_total_loss += dis_loss1.data

        dis_loss1.backward()
        optimizer_dis.update()
        optimizer_cla.update()

        #--------------------
        # generated data
        #--------------------
        cla.cleargrads()  # classifier
        dis.cleargrads()  # discriminator
        gen.cleargrads()  # generator
        rcls = np.random.choice([0, 1], bs2)
        x3 = gen(rcls)
        yy3 = F.softmax(cla.fwd(x3))
        cla_loss6 = F.sum(F.matmul(yy3, F.transpose(yy3)))

        cls_total_loss += cla_loss6.data

        cla_loss6.backward()  ## CLS loss 1
        optimizer_cla.update()
        optimizer_gen.update()

        dis.cleargrads()  # discriminator
        gen.cleargrads()  # generator

        x3 = gen(rcls)
        yg0 = rcls.reshape(len(rcls), 1).astype(dtype='float32')
        yx3 = F.hstack([yg0, x3])
        one3 = Variable(np.ones(len(yx3)).astype(dtype='int32'))
        #        dis_loss3  = dis(yx3,one3,train=False)            ## DIS loss 3
Example #55
0
def main():
    try:
        os.mkdir(args.snapshot_directory)
    except:
        pass

    comm = chainermn.create_communicator()
    device = comm.intra_rank
    cuda.get_device(device).use()
    xp = cp

    images = []
    files = os.listdir(args.dataset_path)
    files.sort()
    subset_size = int(math.ceil(len(files) / comm.size))
    files = deque(files)
    files.rotate(-subset_size * comm.rank)
    files = list(files)[:subset_size]
    for filename in files:
        image = np.load(os.path.join(args.dataset_path, filename))
        image = image / 256
        images.append(image)

    print(comm.rank, files)

    images = np.vstack(images)
    images = images.transpose((0, 3, 1, 2)).astype(np.float32)
    train_dev_split = 0.9
    num_images = images.shape[0]
    num_train_images = int(num_images * train_dev_split)
    num_dev_images = num_images - num_train_images
    images_train = images[:num_train_images]

    # To avoid OpenMPI bug
    # multiprocessing.set_start_method("forkserver")
    # p = multiprocessing.Process(target=print, args=("", ))
    # p.start()
    # p.join()

    hyperparams = HyperParameters()
    hyperparams.chz_channels = args.chz_channels
    hyperparams.generator_generation_steps = args.generation_steps
    hyperparams.generator_share_core = args.generator_share_core
    hyperparams.generator_share_prior = args.generator_share_prior
    hyperparams.generator_share_upsampler = args.generator_share_upsampler
    hyperparams.generator_downsampler_channels = args.generator_downsampler_channels
    hyperparams.inference_share_core = args.inference_share_core
    hyperparams.inference_share_posterior = args.inference_share_posterior
    hyperparams.inference_downsampler_channels = args.inference_downsampler_channels
    hyperparams.batch_normalization_enabled = args.enable_batch_normalization
    hyperparams.use_gru = args.use_gru
    hyperparams.no_backprop_diff_xr = args.no_backprop_diff_xr

    if comm.rank == 0:
        hyperparams.save(args.snapshot_directory)
        hyperparams.print()

    if args.use_gru:
        model = GRUModel(hyperparams,
                         snapshot_directory=args.snapshot_directory)
    else:
        model = LSTMModel(hyperparams,
                          snapshot_directory=args.snapshot_directory)
    model.to_gpu()

    optimizer = AdamOptimizer(model.parameters,
                              lr_i=args.initial_lr,
                              lr_f=args.final_lr,
                              beta_1=args.adam_beta1,
                              communicator=comm)
    if comm.rank == 0:
        optimizer.print()

    num_pixels = images.shape[1] * images.shape[2] * images.shape[3]

    dataset = draw.data.Dataset(images_train)
    iterator = draw.data.Iterator(dataset, batch_size=args.batch_size)

    num_updates = 0

    for iteration in range(args.training_steps):
        mean_kld = 0
        mean_nll = 0
        mean_mse = 0
        start_time = time.time()

        for batch_index, data_indices in enumerate(iterator):
            x = dataset[data_indices]
            x += np.random.uniform(0, 1 / 256, size=x.shape)
            x = to_gpu(x)

            z_t_param_array, x_param, r_t_array = model.sample_z_and_x_params_from_posterior(
                x)

            loss_kld = 0
            for params in z_t_param_array:
                mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p = params
                kld = draw.nn.functions.gaussian_kl_divergence(
                    mean_z_q, ln_var_z_q, mean_z_p, ln_var_z_p)
                loss_kld += cf.sum(kld)

            loss_sse = 0
            for r_t in r_t_array:
                loss_sse += cf.sum(cf.squared_error(r_t, x))

            mu_x, ln_var_x = x_param

            loss_nll = cf.gaussian_nll(x, mu_x, ln_var_x)

            loss_nll /= args.batch_size
            loss_kld /= args.batch_size
            loss_sse /= args.batch_size
            loss = args.loss_beta * loss_nll + loss_kld + args.loss_alpha * loss_sse

            model.cleargrads()
            loss.backward(loss_scale=optimizer.loss_scale())
            optimizer.update(num_updates, loss_value=float(loss.array))

            num_updates += 1
            mean_kld += float(loss_kld.data)
            mean_nll += float(loss_nll.data)
            mean_mse += float(loss_sse.data) / num_pixels / (
                hyperparams.generator_generation_steps - 1)

            printr(
                "Iteration {}: Batch {} / {} - loss: nll_per_pixel: {:.6f} - mse: {:.6f} - kld: {:.6f} - lr: {:.4e}"
                .format(
                    iteration + 1, batch_index + 1, len(iterator),
                    float(loss_nll.data) / num_pixels + math.log(256.0),
                    float(loss_sse.data) / num_pixels /
                    (hyperparams.generator_generation_steps - 1),
                    float(loss_kld.data), optimizer.learning_rate))

            if comm.rank == 0 and batch_index > 0 and batch_index % 100 == 0:
                model.serialize(args.snapshot_directory)

        if comm.rank == 0:
            model.serialize(args.snapshot_directory)

        if comm.rank == 0:
            elapsed_time = time.time() - start_time
            print(
                "\r\033[2KIteration {} - loss: nll_per_pixel: {:.6f} - mse: {:.6f} - kld: {:.6f} - lr: {:.4e} - elapsed_time: {:.3f} min"
                .format(
                    iteration + 1,
                    mean_nll / len(iterator) / num_pixels + math.log(256.0),
                    mean_mse / len(iterator), mean_kld / len(iterator),
                    optimizer.learning_rate, elapsed_time / 60))
Example #56
0
 def square_norm(x,y):
     return F.sum((F.log(x)-F.log(y))**2)/batchsize
Example #57
0
    def __call__(self, x):
        # Compute parameters for q(z|x, a)
        encoding_time_1 = time.time()
        qmu_a, qln_var_a = self.encode_a(x)
        encoding_time_1 = float(time.time() - encoding_time_1)

        a_enc = F.gaussian(qmu_a, qln_var_a)

        encoding_time_2 = time.time()
        qmu_z, qln_var_z = self.encode_z(x, a_enc)
        encoding_time_2 = float(time.time() - encoding_time_2)

        encoding_time = encoding_time_1 + encoding_time_2

        decoding_time_average = 0.

        self.kl = 0
        self.logp = 0

        logp_a_xz = 0
        logp_x_z = 0
        logp_z = 0
        logq_a_x = 0
        logq_z_ax = 0

        current_temperature = min(self.temperature['value'], 1.0)
        self.temperature['value'] += self.temperature['increment']

        for j in xrange(self.num_zsamples):
            # z ~ q(z|x, a)
            z = F.gaussian(self.qmu_z, self.qln_var_z)

            # Compute p(x|z)
            decoding_time = time.time()
            pmu_a, pln_var_a = self.decode_a(z, x)
            p_ber_prob_logit = self.decode(z)
            decoding_time = time.time() - decoding_time
            decoding_time_average += decoding_time

            logp_a_xz += gaussian_logp(a_enc, pmu_a, pln_var_a)
            logp_x_z += bernoulli_logp(x, p_ber_prob_logit)
            logp_z += current_temperature * gaussian_logp0(z)
            logq_a_x += gaussian_logp(a_enc, qmu_a, qln_var_a)
            logq_z_ax += current_temperature * gaussian_logp(
                z, qmu_z, qln_var_z)

        logp_a_xz /= self.num_zsamples
        logp_x_z /= self.num_zsamples
        logp_z /= self.num_zsamples
        logq_a_x /= self.num_zsamples
        logq_z_ax /= self.num_zsamples

        decoding_time_average /= self.num_zsamples
        self.logp /= self.num_zsamples

        self.obj_batch = logp_a_xz + logp_x_z + logp_z - logq_a_x - logq_z_ax
        self.kl = logq_z_ax - logp_z
        self.logp = logp_x_z

        self.timing_info = np.array([encoding_time, decoding_time_average])

        batch_size = self.obj_batch.shape[0]

        self.obj = -F.sum(self.obj_batch) / batch_size

        return self.obj
        channel_observed = get_normalized_image_variable(t + dt, w)
        if channel_observed is None:
            no_image = True
            continue
        channel_observeds.append(channel_observed)

    if no_image:
        continue

    img_input = F.concat(channel_inputs)
    img_observed = F.concat(channel_observeds)

    img_predicted = predictor(img_input)

    loss = F.sum(abs(img_predicted - img_observed))
    predictor.cleargrads()
    loss.backward()
    optimizer_p.update()
    """
    Train the generator and discriminator
    """
    t2 = t
    no_missing_image = True
    img_forecast = img_input
    if epoch >= start_dcgan_at_epoch:
        for i in range(1, 7):
            t2 = t + i * dt
            img_forecast = predictor(img_forecast)

            channel_futures = []
Example #59
0
 def loss_gen(self, gen, y_fake):
     batchsize = len(y_fake)
     loss = F.sum(-y_fake) / batchsize
     chainer.reporter.report({'loss': loss}, gen)
     return loss
Example #60
0
    def forward(self, xs, ys):
        batch = len(xs)
        xs = [xp.array(x[::-1]) for x in xs]
        exs = sequence_embed(self.embed_x, xs)
        # None represents a zero vector in an encoder.
        hx, cx, xs_states = self.encoder(None, None, exs)
        hx = F.reshape(F.transpose(hx, (1, 0, 2)),
                       (batch, self.n_layers, self.n_units * 2))
        cx = F.reshape(F.transpose(cx, (1, 0, 2)),
                       (batch, self.n_layers, self.n_units * 2))
        hx = [d for d in hx]
        cx = [d for d in cx]

        evs = [self.embed_y(xp.array([i])) for i in range(self.embed_y_size)]

        concat_ys_outs = [[] for _ in ys]
        #print(len(ys))

        att_os = []
        for i, (y, hxs) in enumerate(zip(ys, xs_states)):
            concat_oss = []

            def rec_LSTM(node, eidx, nhxncx):
                nonlocal i
                (nhx, ncx) = nhxncx
                ntype, nchoice, children = node
                #eidx = self.embed_idx[ntype][ppos]
                ev = evs[eidx]
                #print(ev.shape)
                #print(nhx.shape,ncx.shape)
                thx, tcx, nos = self.decoder(nhx, ncx, [ev])
                nos = nos[0]
                #wnos = self.W[ntype](nos)
                if len(self.trans_data[ntype]) > 1:
                    concat_oss.append(nos)
                    concat_ys_outs[i].append(self.choice_idx[ntype][nchoice])
                # otherwise, we don't have to train.

                for j, ch in enumerate(children):
                    #print(ntype,nchoice,i)
                    teidx = self.embed_idx[ntype][nchoice][j]
                    rec_LSTM(ch, teidx, (thx, tcx))

            nhx, ncx = hx[i], cx[i]
            ncx = F.reshape(ncx, (ncx.shape[0], 1, ncx.shape[1]))
            nhx = F.reshape(nhx, (nhx.shape[0], 1, nhx.shape[1]))

            assert y[0] == self.type_size - 1
            ridx = self.embed_root_idx
            rec_LSTM(y, ridx, (nhx, ncx))

            #print(concat_oss[0].shape,len(concat_oss))
            yh = F.concat(concat_oss, axis=0)
            #print(yh.shape)
            ch = self.att(xs_states[i], yh)
            att_os.append(F.tanh(self.Wc(F.concat([ch, yh], axis=1))))

        concat_os = F.concat(att_os, axis=0)
        concat_ys_out = list(map(lambda d: xp.array(d), concat_ys_outs))
        concat_ys_out = F.concat(concat_ys_out, axis=0)
        loss = F.sum(
            F.softmax_cross_entropy(
                self.Ws(concat_os), concat_ys_out, reduce='no')) / batch

        chainer.report({'loss': loss}, self)
        #print(loss)
        return loss