Exemple #1
0
 def __call__(self, y, hiddens=None, scale=True):
     ne_loss = 0
     
     # NE for hiddens
     if hiddens is not None:
         for h in hiddens:
             h_normalized = F.softmax(h)
             h_log_softmax = F.log_softmax(h)
             n = h.data.shape[0]
             l = - F.sum(h_normalized * h_log_softmax) / n 
             if scale:
                 d = np.prod(h.data.shape[1:])
                 l = l / d
             ne_loss += l
             
     # NE for output
     y_normalized = F.softmax(y)
     y_log_softmax = F.log_softmax(y)
     n = y.data.shape[0]
     l = - F.sum(y_normalized * y_log_softmax) / n 
     if scale:
         d = np.prod(y.data.shape[1:])
         l = l / d
     ne_loss += l
     return ne_loss
def dirichlet_likelihood(weights, alpha=None):
    """ Calculate the log likelihood of the observed topic proportions.
    A negative likelihood is more likely than a negative likelihood.

    Args:
        weights (chainer.Variable): Unnormalized weight vector. The vector
            will be passed through a softmax function that will map the input
            onto a probability simplex.
        alpha (float): The Dirichlet concentration parameter. Alpha
            greater than 1.0 results in very dense topic weights such
            that each document belongs to many topics. Alpha < 1.0 results
            in sparser topic weights. The default is to set alpha to
            1.0 / n_topics, effectively enforcing the prior belief that a
            document belong to very topics at once.

    Returns:
        ~chainer.Variable: Output loss variable.
    """
    if type(weights) is Variable:
        n_topics = weights.data.shape[1]
    else:
        n_topics = weights.W.data.shape[1]
    if alpha is None:
        alpha = 1.0 / n_topics
    if type(weights) is Variable:
        log_proportions = F.log_softmax(weights)
    else:
        log_proportions = F.log_softmax(weights.W)
    loss = (alpha - 1.0) * log_proportions
    return -F.sum(loss)
Exemple #3
0
def beam_search(dec,state,y,data,beam_width,mydict_inv):  
    beam_width=beam_width
    xp=cuda.cupy
    batchsize=data.shape[0]
    vocab_size=len(mydict_inv)
    topk=20
    route = np.zeros((batchsize,beam_width,50)).astype(np.int32)
    
    for j in range(50):
        if j == 0:
            y = Variable(xp.array(np.argmax(y.data.get(), axis=1)).astype(xp.int32))
            state,y = dec(y, state, train=False)
            h=state['h1'].data
            c=state['c1'].data
            h=xp.tile(h.reshape(batchsize,1,-1), (1,beam_width,1))
            c=xp.tile(c.reshape(batchsize,1,-1), (1,beam_width,1))
            ptr=F.log_softmax(y).data.get()
            pred_total_city = np.argsort(ptr)[:,::-1][:,:beam_width]
            pred_total_score = np.sort(ptr)[:,::-1][:,:beam_width]
            route[:,:,j] = pred_total_city
            pred_total_city=pred_total_city.reshape(batchsize,beam_width,1)
        else:
            pred_next_score=np.zeros((batchsize,beam_width,topk))
            pred_next_city=np.zeros((batchsize,beam_width,topk)).astype(np.int32)
            score2idx=np.zeros((batchsize,beam_width,topk)).astype(np.int32)
            for b in range(beam_width):
                state={'c1':Variable(c[:,b,:]), 'h1':Variable(h[:,b,:])}
                cur_city = xp.array([pred_total_city[i,b,j-1] for i in range(batchsize)]).astype(xp.int32)
                state,y = dec(cur_city,state, train=False)
                h[:,b,:]=state['h1'].data
                c[:,b,:]=state['c1'].data
                ptr=F.log_softmax(y).data.get()
                pred_next_score[:,b,:]=np.sort(ptr, axis=1)[:,::-1][:,:topk]
                pred_next_city[:,b,:]=np.argsort(ptr, axis=1)[:,::-1][:,:topk]

            h=F.stack([h for i in range(topk)], axis=2).data
            c=F.stack([c for i in range(topk)], axis=2).data
            
            pred_total_city = np.tile(route[:,:,:j],(1,1,topk)).reshape(batchsize,beam_width,topk,j)
            pred_next_city = pred_next_city.reshape(batchsize,beam_width,topk,1)
            pred_total_city = np.concatenate((pred_total_city,pred_next_city),axis=3)

            pred_total_score = np.tile(pred_total_score.reshape(batchsize,beam_width,1),(1,1,topk)).reshape(batchsize,beam_width,topk,1)
            pred_next_score = pred_next_score.reshape(batchsize,beam_width,topk,1)
            pred_total_score += pred_next_score

            idx = pred_total_score.reshape(batchsize,beam_width * topk).argsort(axis=1)[:,::-1][:,:beam_width]

            pred_total_city = pred_total_city[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,j+1)
            pred_total_score = pred_total_score[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,1)
            h = h[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,-1)
            c = c[:,idx//topk, np.mod(idx,topk), :][np.diag_indices(batchsize,ndim=2)].reshape(batchsize,beam_width,-1)

            route[:,:,:j+1] =pred_total_city
            if (pred_total_city[:,:,j] == 15).all():
                break


    return route[:,0,:j+1].tolist()
Exemple #4
0
    def __call__(self, y, t):
        t_normalized = F.softmax(t)
        t_log_softmax = F.log_softmax(t)
        y_log_softmax = F.log_softmax(y)
        n = y.data.shape[0]

        return F.sum((t_normalized * t_log_softmax) \
                         - (t_normalized * y_log_softmax)) / n
    def kl_div(self, other):
        logli = F.log_softmax(self.logits)
        other_logli = F.log_softmax(other.logits)

        # new_prob_var = new_dist_info_vars["prob"]
        # Assume layout is N * A
        return F.sum(
            F.exp(logli) * (logli - other_logli),
            axis=-1
        )
Exemple #6
0
    def __call__(self, y0, y1):
        bs = y0.data.shape[0]
        d = np.prod(y0.data.shape[1:])

        y0_softmax = F.softmax(y0)
        y1_softmax = F.softmax(y1)

        y0_log_softmax = F.log_softmax(y0)
        y1_log_softmax = F.log_softmax(y1)

        kl0 = F.sum(y0_softmax * (y0_log_softmax - y1_log_softmax)) / bs / d
        kl1 = F.sum(y1_softmax * (y1_log_softmax - y0_log_softmax)) / bs / d

        return (kl0 + kl1) / 2
 def logli(self, a):
     all_logli = F.log_softmax(self.logits)
     N = len(a)
     return all_logli[
         np.arange(N),
         a.data.astype(np.int32, copy=False)
     ]
Exemple #8
0
    def __forward(self, batch_x, batch_t, weight, train=True):
        xp = self.xp
        x = Variable(xp.asarray(batch_x), volatile=not train)
        t = Variable(xp.asarray(batch_t), volatile=not train)
        y = self.net(x, train=train)

        b, c, n = y.data.shape
        mask = Variable(xp.asarray(np.broadcast_to(weight.reshape(-1, 1, 1), (b, c, n)) * loss_mask(batch_t, self.net.rating_num)), volatile=not train)
        if self.ordinal_weight == 0:
            loss = F.sum(-F.log_softmax(y) * mask) / b
        elif self.ordinal_weight == 1:
            loss = ordinal_loss(y, mask)
        else:
            loss = (1 - self.ordinal_weight) * F.sum(-F.log_softmax(y) * mask) / b + self.ordinal_weight * ordinal_loss(y, mask)

        acc = self.__accuracy(y, t)
        return loss, acc
Exemple #9
0
    def __call__(self, y, ):
        bs = y.data.shape[0]
        d = np.prod(y.data.shape[1])
        if len(y.shape) > 2:
            s = np.prod(y.data.shape[2:])

            y = F.reshape(y, (bs, d, s))
            y = F.transpose(y, (0, 2, 1))

            y_normalized = F.softmax(y, use_cudnn=False)
            y_log_softmax = F.log_softmax(y, use_cudnn=False)
            self.loss = - F.sum(y_normalized * y_log_softmax) / bs / s
        else:
            y_normalized = F.softmax(y)
            y_log_softmax = F.log_softmax(y)
            self.loss = - F.sum(y_normalized * y_log_softmax) / bs / d

        return self.loss
Exemple #10
0
    def __call__(self, y, ):
        bs = y.data.shape[0]
        d = np.prod(y.data.shape[1:])

        y_normalized = F.softmax(y)
        y_log_softmax = F.log_softmax(y)
        self.loss = - F.sum(y_normalized * y_log_softmax) / bs / d

        return self.loss
Exemple #11
0
    def check_forward(self, x_data, use_cudnn=True):
        x = chainer.Variable(x_data)
        y = functions.log_softmax(x, use_cudnn)
        self.assertEqual(y.data.dtype, numpy.float32)

        log_z = numpy.ufunc.reduce(
            numpy.logaddexp, self.x, axis=1, keepdims=True)
        y_expect = self.x - log_z

        gradient_check.assert_allclose(y_expect, y.data)
Exemple #12
0
 def forward(self, ids, bow):
     bow, ids = utils.move(self.xp, bow, ids)
     proportions = self.proportions(ids)
     ld = dirichlet_likelihood(proportions)
     doc = F.matmul(F.softmax(proportions), self.factors())
     logp = F.dropout(self.embedding(doc))
     # loss = -F.sum(bow * F.log_softmax(logp))
     sources, targets, counts = [], [], []
     lpi =  F.sum(bow * F.log_softmax(logp), axis=1)
     loss = -F.sum(lpi)
     return loss, ld
Exemple #13
0
    def check_forward(self, x_data, use_cudnn='always'):
        x = chainer.Variable(x_data)
        with chainer.using_config('use_cudnn', use_cudnn):
            y = functions.log_softmax(x)
        self.assertEqual(y.data.dtype, self.dtype)

        log_z = numpy.ufunc.reduce(
            numpy.logaddexp, self.x, axis=1, keepdims=True)
        y_expect = self.x - log_z

        testing.assert_allclose(
            y_expect, y.data, **self.check_forward_options)
Exemple #14
0
    def predict(self, state, x):
        """Predict log probabilities for given state and input x using the predictor

        :param state : the state
        :param x : the input
        :return a tuple (state, log prob vector)
        :rtype cupy/numpy array
        """
        if hasattr(self.predictor, 'normalized') and self.predictor.normalized:
            return self.predictor(state, x)
        else:
            state, z = self.predictor(state, x)
            return state, F.log_softmax(z).data
Exemple #15
0
    def __call__(self, o):
        log_pi = F.relu(self.h1(o))
        log_pi = F.relu(self.h2(log_pi))
        log_pi = F.log_softmax(self.h3(log_pi))
        probs = F.exp(log_pi)[0]

        # avoid "ValueError: sum(pvals[:-1]) > 1.0" in numpy.multinomial
        diff = sum(probs.data[:-1]) - 1
        if diff > 0:
            probs -= (diff + EPS) / A_DIM

        a = np.random.multinomial(1, probs.data).astype(np.float32)
        return log_pi, a
Exemple #16
0
    def check_forward(self, x_data, use_cudnn='always'):
        x = chainer.Variable(x_data)
        with chainer.using_config('use_cudnn', use_cudnn):
            y = functions.log_softmax(x)
        self.assertEqual(y.data.dtype, self.dtype)

        log_z = numpy.ufunc.reduce(numpy.logaddexp,
                                   self.x,
                                   axis=1,
                                   keepdims=True)
        y_expect = self.x - log_z

        testing.assert_allclose(y_expect, y.data, **self.check_forward_options)
Exemple #17
0
    def __forward(self, batch_x, batch_t, weight, train=True):
        xp = self.xp
        x = Variable(xp.asarray(batch_x), volatile=not train)
        t = Variable(xp.asarray(batch_t), volatile=not train)
        y = self.net(x, train=train)

        b, c, n = y.data.shape
        mask = Variable(xp.asarray(
            np.broadcast_to(weight.reshape(-1, 1, 1), (b, c, n)) *
            loss_mask(batch_t, self.net.rating_num)),
                        volatile=not train)
        if self.ordinal_weight == 0:
            loss = F.sum(-F.log_softmax(y) * mask) / b
        elif self.ordinal_weight == 1:
            loss = ordinal_loss(y, mask)
        else:
            loss = (1 - self.ordinal_weight) * F.sum(
                -F.log_softmax(y) *
                mask) / b + self.ordinal_weight * ordinal_loss(y, mask)

        acc = self.__accuracy(y, t)
        return loss, acc
Exemple #18
0
    def __call__(self, x, t, qt=None):
        # forward
        z = self.enc(x)
        e = self.vq(z)
        e_ = self.vq(chainer.Variable(z.data))
        scale = t.shape[2] // e.shape[2]
        if self.quantize == 'mulaw':
            y_hat = self.dec(qt, F.unpooling_2d(e, (scale, 1),
                                                cover_all=False))
        elif self.quantize == 'mixture':
            y_hat = self.dec(x, F.unpooling_2d(e, (scale, 1), cover_all=False))

        # calculate loss
        if self.quantize == 'mulaw':
            loss1 = F.softmax_cross_entropy(y_hat, t)
        elif self.quantize == 'mixture':
            y_hat = y_hat[:, :30]
            logit_probs, means, log_scales = F.split_axis(y_hat, 3, 1)
            log_scales = F.relu(log_scales + 7) - 7
            y = F.broadcast_to(t, means.shape)

            centered_y = y - means
            inv_stdv = F.exp(-log_scales)
            plus_in = inv_stdv * (centered_y + 1 / (2**16))
            cdf_plus = F.sigmoid(plus_in)
            min_in = inv_stdv * (centered_y - 1 / (2**16))
            cdf_min = F.sigmoid(min_in)

            log_cdf_plus = plus_in - F.softplus(plus_in)
            log_one_minus_cdf_min = -F.softplus(min_in)

            cdf_delta = cdf_plus - cdf_min
            cdf_delta = F.relu(cdf_delta - 1e-12) + 1e-12

            y = F.broadcast_to(t, log_cdf_plus.shape).array
            log_probs = F.where(
                y < -0.999, log_cdf_plus,
                F.where(y > 0.999, log_one_minus_cdf_min, F.log(cdf_delta)))
            log_probs = log_probs + F.log_softmax(logit_probs)
            loss1 = -F.mean(log_probs)
        loss2 = F.mean((chainer.Variable(z.data) - e_)**2)
        loss3 = self.beta * F.mean((z - chainer.Variable(e.data))**2)
        loss = loss1 + loss2 + loss3
        chainer.reporter.report(
            {
                'loss1': loss1,
                'loss2': loss2,
                'loss3': loss3,
                'loss': loss
            }, self)
        return loss1, loss2, loss3
 def wer_fun(model, testFeat, normalizeBias):
     global args
     # Use decode test data to forward network
     temp = E.KaldiDict()
     print('(testing) Forward network', end=" " * 20 + '\r')
     with chainer.using_config('train', False), chainer.no_backprop_mode():
         for utt in testFeat.keys():
             data = cp.array(testFeat[utt], dtype=cp.float32)
             out1, out2 = model(data)
             out = F.log_softmax(out1, axis=1)
             out.to_cpu()
             temp[utt] = out.array - normalizeBias
     # Tansform KaldiDict to KaldiArk format
     print('(testing) Transform to ark', end=" " * 20 + '\r')
     amp = temp.ark
     # Decoding to obtain a lattice
     hmm = args.TIMITpath + '/exp/dnn4_pretrain-dbn_dnn_ali_test/final.mdl'
     hclg = args.TIMITpath + '/exp/tri3/graph/HCLG.fst'
     lexicon = args.TIMITpath + '/exp/tri3/graph/words.txt'
     print('(testing) Generate Lattice', end=" " * 20 + '\r')
     lattice = E.decode_lattice(amp, hmm, hclg, lexicon, args.minActive,
                                args.maxActive, args.maxMemory, args.beam,
                                args.latBeam, args.acwt)
     # Change language weight from 1 to 10, get the 1best words.
     print('(testing) Get 1-best words', end=" " * 20 + '\r')
     outs = lattice.get_1best(lmwt=args.minLmwt,
                              maxLmwt=args.maxLmwt,
                              outFile=args.outDir + '/outRaw.txt')
     # If reference file is not existed, make it.
     phonemap = args.TIMITpath + '/conf/phones.60-48-39.map'
     outFilter = args.TIMITpath + '/local/timit_norm_trans.pl -i - -m {} -from 48 -to 39'.format(
         phonemap)
     if not os.path.isfile(args.outDir + '/test_filt.txt'):
         refText = args.TIMITpath + '/data/test/text'
         cmd = 'cat {} | {} > {}/test_filt.txt'.format(
             refText, outFilter, args.outDir)
         (_, _) = E.run_shell_cmd(cmd)
     # Score WER and find the smallest one.
     print('(testing) Score', end=" " * 20 + '\r')
     minWER = None
     for k in range(args.minLmwt, args.maxLmwt + 1, 1):
         cmd = 'cat {} | {} > {}/test_prediction_filt.txt'.format(
             outs[k], outFilter, args.outDir)
         (_, _) = E.run_shell_cmd(cmd)
         os.remove(outs[k])
         score = E.wer('{}/test_filt.txt'.format(args.outDir),
                       "{}/test_prediction_filt.txt".format(args.outDir),
                       mode='all')
         if minWER == None or score['WER'] < minWER:
             minWER = score['WER']
     return minWER
def dirichlet_likelihood(weights, alpha=None):
    """ Calculate the log likelihood of the observed topic proportions.
    A negative likelihood is more likely than a negative likelihood.

    Args:
        weights (chainer.Variable): Unnormalized weight vector. The vector
            will be passed through a softmax function that will map the input
            onto a probability simplex.
        alpha (float): The Dirichlet concentration parameter. Alpha
            greater than 1.0 results in very dense topic weights such
            that each document belongs to many topics. Alpha < 1.0 results
            in sparser topic weights. The default is to set alpha to
            1.0 / n_topics, effectively enforcing the prior belief that a
            document belong to very topics at once.

    Returns:
        ~chainer.Variable: Output loss variable.
    """
    if type(weights) is Variable:
        n_topics = weights.data.shape[1]
    else:
        n_topics = weights.W.data.shape[1]
    # logger.info('dirichlet_likelihood on topics of {}'.format(n_topics))

    if alpha is None:
        alpha = 1.0 / n_topics

    if type(weights) is Variable:
        log_proportions = F.log_softmax(weights)
    else:
        log_proportions = F.log_softmax(weights.W)

    # positive
    loss = (alpha - 1.0) * log_proportions
    # negative
    # return -F.sum(loss)

    return F.sum(loss)
Exemple #21
0
    def __call__(
        self,
        y,
    ):
        bs = y.data.shape[0]
        d = np.prod(y.data.shape[1:])

        y = F.reshape(y, (bs, d))

        y_normalized = F.softmax(y)
        y_log_softmax = F.log_softmax(y)
        self.loss = -F.sum(y_normalized * y_log_softmax) / bs / d

        return self.loss
Exemple #22
0
 def decode(self, sample, bow):
     """ Decode latent document vectors back into word counts
     (n_docs, n_vocab).
     """
     logprob = F.log_softmax(self.embedding(sample))
     # This is equivalent to a softmax_cross_entropy where instead of
     # guessing 1 of N words we have repeated observations
     # Normal softmax for guessing the next word is:
     # t log softmax(x), where t is 0 or 1
     # Softmax for guessing word counts is simply doing
     # the above more times, so multiply by the count
     # count log softmax(x)
     loss = -F.sum(bow * logprob)
     return loss
Exemple #23
0
    def log_softmax(self, hs):
        """Log_softmax of frame activations.

        Args:
            hs (list of chainer.Variable | N-dimension array):
                Input variable from encoder.

        Returns:
            chainer.Variable: A n-dimension float array.

        """
        y_hat = self.ctc_lo(F.pad_sequence(hs), n_batch_axes=2)
        return F.log_softmax(y_hat.reshape(-1, y_hat.shape[-1])).reshape(
            y_hat.shape)
Exemple #24
0
    def output_and_loss(self, concat_logit_block, t_block, batch, length):
        # Output (all together at once for efficiency)
        rebatch, _ = concat_logit_block.shape
        # Make target
        concat_t_block = t_block.reshape((rebatch)).data
        ignore_mask = (concat_t_block >= 0)
        n_token = ignore_mask.sum()
        normalizer = n_token if self.normalize_length else batch
        if not self.use_label_smoothing:
            loss = F.softmax_cross_entropy(concat_logit_block, concat_t_block)
            loss = loss * n_token / normalizer
        else:
            p_lsm = self.lsm_weight
            p_loss = 1. - p_lsm
            log_prob = F.log_softmax(concat_logit_block)
            broad_ignore_mask = self.xp.broadcast_to(ignore_mask[:, None],
                                                     concat_logit_block.shape)
            pre_loss = ignore_mask * \
                log_prob[self.xp.arange(rebatch), concat_t_block]
            loss = -F.sum(pre_loss) / normalizer
            label_smoothing = broad_ignore_mask * \
                - 1. / self.n_target_vocab * log_prob
            label_smoothing = F.sum(label_smoothing) / normalizer
            loss = p_loss * loss + p_lsm * label_smoothing
        accuracy = F.accuracy(concat_logit_block,
                              concat_t_block,
                              ignore_label=-1)

        if self.verbose > 0 and self.char_list is not None:
            with chainer.no_backprop_mode():
                rc_block = F.transpose(
                    concat_logit_block.reshape((batch, length, -1)), (0, 2, 1))
                rc_block.to_cpu()
                t_block.to_cpu()
                for (i, y_hat_), y_true_ in zip(enumerate(rc_block.data),
                                                t_block.data):
                    if i == MAX_DECODER_OUTPUT:
                        break
                    idx_hat = np.argmax(y_hat_[:, y_true_ != -1], axis=0)
                    idx_true = y_true_[y_true_ != -1]
                    eos_true = np.where(y_true_ == self.eos)[0][0]
                    seq_hat = [self.char_list[int(idx)] for idx in idx_hat]
                    seq_true = [
                        self.char_list[int(idx)] for idx in idx_true[:eos_true]
                    ]
                    seq_hat = "".join(seq_hat).replace('<space>', ' ')
                    seq_true = "".join(seq_true).replace('<space>', ' ')
                    logging.info("groundtruth[%d]: " % i + seq_true)
                    logging.info("prediction [%d]: " % i + seq_hat)
        return loss, accuracy
    def __call__(self, x, t):
        y_list = self.predictor(x)
        _len, _cls = y_list.shape
        if self.sm_fuse:
            _sm = F.reshape(F.log_softmax(y_list), (self.n_kernel, _len // self.n_kernel, _cls))
            ave_y = F.average(_sm, axis=0)
            loss = - F.average(F.select_item(ave_y, t))
        else:
            loss = F.average(F.softmax_cross_entropy(y_list, F.tile(t, self.n_kernel)))

        conf = F.average(
            F.reshape(y_list, (self.n_kernel, _len // self.n_kernel, _cls)), axis=0)
        chainer.report(
            {'loss': loss, 'accuracy': F.accuracy(conf, t)}, self)
        return loss
Exemple #26
0
    def __call__(self, x_recon, x, enc_hiddens, dec_hiddens, scale=True):
        """
        Parameters
        -----------------
        x_recon: Variable to be reconstructed as label
        x: Variable to be reconstructed as label
        enc_hiddens: list of Variable
        dec_hiddens: list of Varialbe
        """
        kl_recon_loss = 0

        # Lateral Recon Loss
        if self.rc and enc_hiddens is not None:
            for h0, h1 in zip(enc_hiddens[::-1], dec_hiddens):
                n = h0.shape[0]
                d = np.prod(h0.shape[1:])
                p = F.softmax(h0)
                log_p = F.log_softmax(h0)
                log_q = F.log_softmax(h1)
                l = F.sum(p * (log_p - log_q)) / n / d
                kl_recon_loss += l

        self.loss = kl_recon_loss
        return self.loss
Exemple #27
0
    def __call__(self, states, plies, res, ply_num, train=True):
        sum_loss = 0
        for i in range(len(states)):

            x = chainer.Variable(self.xp.array([states[i][j] for j in range(ply_num[i])], 'float32'))
            scores = self.predict(x, train)

            log_prob = F.log_softmax(scores)  # (batch_size, vocab_size)
            loss = 0
            for j in range(ply_num[i]):
                loss += log_prob[j, plies[i][j]] * res[i]

            sum_loss += loss / ply_num[i]

        return - sum_loss / len(states)
Exemple #28
0
    def __call__(self, y, ):
        bs = y.data.shape[0]
        d = np.prod(y.data.shape[1:])

        y_normalized = F.softmax(y)
        y_log_softmax = F.log_softmax(y)
        negentropy = F.sum(y_normalized * y_log_softmax, axis=1) / d

        #zeros = to_device(np.zeros(bs).astype(np.float32), 2)
        ones = to_device(-np.ones(bs).astype(np.float32), 2)
        self.loss = F.sum(F.maximum(
            Variable(ones), 
            - negentropy)) / bs
        
        return self.loss
Exemple #29
0
    def __call__(self, x_recon, x, enc_hiddens, dec_hiddens, scale=True):
        """
        Parameters
        -----------------
        x_recon: Variable to be reconstructed as label
        x: Variable to be reconstructed as label
        enc_hiddens: list of Variable
        dec_hiddens: list of Varialbe
        """
        kl_recon_loss = 0
        
        # Lateral Recon Loss
        if self.rc and enc_hiddens is not None:
            for h0, h1 in zip(enc_hiddens[::-1], dec_hiddens):
                n = h0.shape[0]
                d = np.prod(h0.shape[1:])
                p = F.softmax(h0)
                log_p = F.log_softmax(h0)
                log_q = F.log_softmax(h1)
                l = F.sum(p * (log_p - log_q)) / n / d
                kl_recon_loss += l

        self.loss = kl_recon_loss
        return self.loss
Exemple #30
0
    def __call__(
        self,
        y,
    ):
        bs = y.data.shape[0]
        d = np.prod(y.data.shape[1:])

        y_normalized = F.softmax(y)
        y_log_softmax = F.log_softmax(y)
        negentropy = F.sum(y_normalized * y_log_softmax, axis=1) / d

        #zeros = to_device(np.zeros(bs).astype(np.float32), 2)
        ones = to_device(-np.ones(bs).astype(np.float32), 2)
        self.loss = F.sum(F.maximum(Variable(ones), -negentropy)) / bs

        return self.loss
Exemple #31
0
    def get_action(self, z, m):
        # assert m.shape == (1, M_DIM * Krp)
        self.m = m
        state = F.concat((z.data, self.h, m))  # Stop gradients wrt z.
        state = F.tanh(self.pi1(state))
        log_pi = F.log_softmax(
            self.pi2(state))  # log_softmax may be more stable.
        probs = F.exp(log_pi)[0]

        # avoid "ValueError: sum(pvals[:-1]) > 1.0" in numpy.multinomial
        diff = sum(probs.data[:-1]) - 1
        if diff > 0:
            probs -= (diff + np.finfo(np.float32).epsneg) / (A_DIM - 1)

        a = np.random.multinomial(1, probs.data).astype(np.float32)  # onehot
        return log_pi, a
    def forward(self, s):
        #                                                      s: batch_size x board_x x board_y
        s = F.reshape(s, (-1, 1, self.board_x, self.board_y))  # batch_size x 1 x board_x x board_y
        s = F.relu(self.bn1(self.conv1(s)))                    # batch_size x num_channels x board_x x board_y
        s = F.relu(self.bn2(self.conv2(s)))                    # batch_size x num_channels x board_x x board_y
        s = F.relu(self.bn3(self.conv3(s)))                    # batch_size x num_channels x (board_x-2) x (board_y-2)
        s = F.relu(self.bn4(self.conv4(s)))                    # batch_size x num_channels x (board_x-4) x (board_y-4)
        s = F.reshape(s, (-1, self.args.num_channels*(self.board_x-4)*(self.board_y-4)))

        s = F.dropout(F.relu(self.fc_bn1(self.fc1(s))), ratio=self.args.dropout)  # batch_size x 1024
        s = F.dropout(F.relu(self.fc_bn2(self.fc2(s))), ratio=self.args.dropout)  # batch_size x 512

        pi = self.fc3(s)                                             # batch_size x action_size
        v = self.fc4(s)                                              # batch_size x 1

        return F.log_softmax(pi, axis=1), F.tanh(v)
Exemple #33
0
    def predict(self, xs):
        """
        batch: list of splitted sentences
        """
        batchsize = len(xs)
        xs = [self.extractor.process(x) for x in xs]
        ws, ss, ps = concat_examples(xs, padding=IGNORE)
        cat_ys, dep_ys = self.forward(ws, ss, ps)
        cat_ys = F.transpose(F.stack(cat_ys, 2), (0, 2, 1))
        dep_ys = F.transpose(F.stack(dep_ys, 2), (0, 2, 1))

        cat_ys = [F.squeeze(y, 0).data[1:len(x) + 1] for x, y in \
                zip(xs, F.split_axis(cat_ys, batchsize, 0))]

        dep_ys = [F.squeeze(F.log_softmax(y[1:len(x) + 1, :-1]), 0).data \
                for x, y in zip(xs, F.split_axis(dep_ys, batchsize, 0))]
        return cat_ys, dep_ys
Exemple #34
0
    def metric(self, model, images, labels):
        batchsize = len(images)
        embeddings = model(images)

        embeddings = F.reshape(embeddings, ((batchsize, -1)))
        shape = embeddings.shape
        metric = 0
        for embedding in embeddings:
            eculideans = F.sum(
                (embeddings - F.broadcast_to(embedding,
                                             (batchsize, shape[1])))**2,
                axis=1)
            ratios = -F.log_softmax(F.expand_dims(-eculideans, axis=0))[0]
            weights = F.softmax(F.expand_dims(-eculideans, axis=0))[0]
            metric += F.sum(ratios * weights)
        chainer.report({'metric': metric}, model)
        return metric
Exemple #35
0
    def metric(self, model, images, labels):
        xp = cupy.get_array_module(images)
        batchsize = len(images)
        embeddings = model(images)

        embeddings = F.reshape(embeddings, ((batchsize, -1)))
        shape = embeddings.shape
        metric = 0
        for embedding, label in zip(embeddings, labels):
            eculideans = F.sum(
                (embeddings - F.broadcast_to(embedding,
                                             (batchsize, shape[1])))**2,
                axis=1)
            ratios = -F.log_softmax(F.expand_dims(-eculideans, axis=0))[0]
            metric += F.sum(ratios[xp.where(labels == label)])
        chainer.report({'metric': metric}, model)
        return metric
Exemple #36
0
 def loss(self, x, target):
     xp = chainer.cuda.get_array_module(target)
     logit = F.softmax(x)
     logit = F.clip(logit, x_min=self.eps, x_max=1-self.eps)
     if self.ls == False:
         loss_ce = F.softmax_cross_entropy(x, target)
     else:
         oh_target = xp.eye(self.class_num)[target]
         ls_target = self.label_smoothing(oh_target, epsilon=0.1, xp=xp)
         loss_ce = -F.sum(F.log_softmax(x) * ls_target) / ls_target.shape[0]
     
     self.pc = self.pc*0.95 + F.mean(logit, axis=0).data*0.05
     k = self.h * self.pc + (1 - self.h)
     gamma = F.log(1 - k) / F.log(1 - self.pc) - 1
     
     loss_focal = loss_ce * self.alpha * (1 - logit) ** gamma
     return F.mean(loss_focal)
Exemple #37
0
    def beam(self, xs, ys, maxlen, beamsize, n_cands, ranking):
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            h, c, oxs = self.encoder.nstep(xs, reverse=self.reverse)

            # Initiarization
            ht = [self.xp.zeros(self.units, 'f').reshape(1, self.units)] \
                  if self.feeding else None

            que = [(0.0, [BOS_ID], h, c, ht)]

            # Beam search
            for _ in range(maxlen):
                if all(map(lambda s: s[1][-1] == EOS_ID, que)):
                    break
                new_que = [] 
                for score, seq, h, c, ht in que:
                    if seq[-1] == EOS_ID:
                        new_que.append((score, seq, h, c, ht))
                    else:
                        # decode
                        w = self.xp.array([seq[-1]], self.xp.int32)
                        if self.use_attn:
                            h, c, o, ht = self.decoder.onestep(w, h, c, oxs, ht)
                        else:
                            h, c, o = self.decoder.onestep(w, h, c)
                        o = -F.log_softmax(o)
                        nbest_ids = get_argnbest(o, beamsize)[0]

                        # calclate log likelihood
                        for index in nbest_ids:
                            new_score = score + float(o[0][index].data)
                            new_seq = seq + [index]
                            new_que.append((new_score, new_seq, h, c, ht))

                # sort in the new_queue of the higher likelihood
                new_que.sort(key=lambda x: x[0]/(len(x[1]) - 1))
                que = new_que[:beamsize]

        # Remove EOS and BOS tags
        hyps = [que[i][1][1:-1] if que[i][1][-1] == EOS_ID 
                   else que[i][1][1:] for i in range(beamsize)]

        # ranking
        if ranking == 'sbleu':
            hyps = self.sbleu_ranking(hyps, ys)
        return hyps[:n_cands]
Exemple #38
0
def test(net, inputs, test_token_len, beam_width=10):
    xp = net.xp
    from_sentences = []
    sentences = []
    for xs in inputs:
        net.reset_state()
        for raw_x in xs.data:
            x = xp.full((beam_width,), raw_x, dtype=np.int32)
            x = chainer.Variable(x, volatile=True)
            net(x, decode=False, train=False)
        candidates = [(None, [begin_id], 0)]
        for i in six.moves.range(test_token_len):
            next_candidates = []
            current_candidates = []
            x = []
            for sub_state, tokens, likelihood in candidates:
                if tokens[-1] == end_id:
                    continue
                if sub_state != None:
                    net.set_sub_state(len(x), sub_state)
                current_candidates.append((len(x), tokens, likelihood))
                x.append(tokens[-1])
            x = chainer.Variable(xp.asarray(x, dtype=np.int32), volatile=True)
            y = F.log_softmax(net(x, decode=True, train=False))
            for j, tokens, likelihood in current_candidates:
                sub_state = net.get_sub_state(j)
                token_likelihoods = cuda.to_cpu(y.data[0])
                top_tokens = token_likelihoods.argsort()[-beam_width:]
                next_candidates.extend([(sub_state, tokens + [j], likelihood + token_likelihoods[j]) for j in top_tokens])
            candidates = sorted(next_candidates, key=lambda x: -x[2])[:beam_width]
            if all([candidate[1][-1] == end_id for candidate in candidates]):
                break
        sentences.append(candidates[0][1][1:-1])
    return sentences

    for xs in inputs:
        while len(tokens) < test_token_len:
            token_id = chainer.Variable(xp.asarray([token_id], dtype=np.int32), volatile=True)
            y = net(token_id, decode=True, train=False)
            token_id = int(xp.argmax(y.data[0]))
            if token_id == end_id:
                break
            tokens.append(token_id)
        sentences.append(tokens)
    return sentences
    def output_and_loss(self, h_block, t_block):
        batch, units, length = h_block.shape

        # Output (all together at once for efficiency)
        concat_logit_block = seq_func(self.output,
                                      h_block,
                                      reconstruct_shape=False)
        rebatch, _ = concat_logit_block.shape
        # Make target
        concat_t_block = t_block.reshape((rebatch))
        ignore_mask = (concat_t_block >= 0)
        n_token = ignore_mask.sum()
        normalizer = n_token  # n_token or batch or 1
        # normalizer = 1

        if not self.use_label_smoothing:
            loss = F.softmax_cross_entropy(concat_logit_block, concat_t_block)
            loss = loss * n_token / normalizer
        else:
            log_prob = F.log_softmax(concat_logit_block)
            broad_ignore_mask = self.xp.broadcast_to(ignore_mask[:, None],
                                                     concat_logit_block.shape)
            pre_loss = ignore_mask * \
                log_prob[self.xp.arange(rebatch), concat_t_block]
            loss = -F.sum(pre_loss) / normalizer

        accuracy = F.accuracy(concat_logit_block,
                              concat_t_block,
                              ignore_label=-1)
        perp = self.xp.exp(loss.data * normalizer / n_token)

        # Report the Values
        reporter.report(
            {
                'loss': loss.data * normalizer / n_token,
                'acc': accuracy.data,
                'perp': perp
            }, self)

        if self.use_label_smoothing:
            label_smoothing = broad_ignore_mask * \
                - 1. / self.n_target_vocab * log_prob
            label_smoothing = F.sum(label_smoothing) / normalizer
            loss = 0.9 * loss + 0.1 * label_smoothing
        return loss
Exemple #40
0
    def recognize(self, x_block, recog_args, char_list=None, rnnlm=None):
        '''E2E beam search

        :param ndarray x: input acouctic feature (B, T, D) or (T, D)
        :param namespace recog_args: argment namespace contraining options
        :param list char_list: list of characters
        :param torch.nn.Module rnnlm: language model module
        :return: N-best decoding results
        :rtype: list
        '''

        xp = self.xp
        with chainer.no_backprop_mode(), chainer.using_config('train', False):
            ilens = [x_block.shape[0]]
            batch = len(ilens)
            xs, x_mask, ilens = self.encoder(x_block[None, :, :], ilens)
            logging.info('Encoder size: ' + str(xs.shape))
            if recog_args.ctc_weight > 0.0:
                raise NotImplementedError(
                    'use joint ctc/tranformer decoding. WIP')
            if recog_args.beam_size == 1:
                logging.info('Use greedy search implementation')
                ys = xp.full((1, 1), self.sos)
                score = xp.zeros(1)
                maxlen = xs.shape[1] + 1
                for step in range(maxlen):
                    yy_mask = self.make_attention_mask(ys, ys)
                    yy_mask *= self.make_history_mask(ys)
                    xy_mask = self.make_attention_mask(ys, xp.array(x_mask))
                    out = self.decoder(ys, yy_mask, xs,
                                       xy_mask).reshape(batch, -1, self.odim)
                    prob = F.log_softmax(out[:, -1], axis=-1)
                    max_prob = prob.array.max(axis=1)
                    next_id = F.argmax(prob, axis=1).array.astype(np.int64)
                    score += max_prob
                    if step == maxlen - 1:
                        next_id[0] = self.eos
                    ys = F.concat((ys, next_id[None, :]), axis=1).data
                    if next_id[0] == self.eos:
                        break
                nbest_hyps = [{"score": score, "yseq": ys[0].tolist()}]
            else:
                raise NotImplementedError(
                    'use beam search implementation. WIP')
        return nbest_hyps
    def log_propensity_independent(self, x, action):
        xp = cuda.get_array_module(action)
        pred = self._predict(x)

        final_action = action
        if self.k > 0 and action.shape[1] < pred.shape[1]:
            all_actions = F.broadcast_to(xp.arange(0, pred.shape[1],
                                                   dtype=action.data.dtype),
                                         pred.shape)
            inv_items = inverse_select_items_per_row(all_actions, action)
            items = select_items_per_row(all_actions, action)
            final_action = F.concat((items, inv_items), axis=1)

        pred = select_items_per_row(pred, final_action)

        results = F.log_softmax(pred)
        if self.k > 0:
            results = results[:, :self.k]
        return results
    def sample(self, vis_feats, temperature=1, stochastic=True):
        xp = cuda.get_array_module(vis_feats)
        batch_size = vis_feats.shape[0]
        self.LSTM_initialize()

        output = xp.zeros((batch_size, self.seq_length), dtype=xp.int32)
        log_probs = []
        mask = xp.ones(batch_size)

        with chainer.using_config('train', False):
            for i in range(self.seq_length):
                if i == 0:
                    sos = self.word_emb(
                        Variable(
                            xp.ones(batch_size, dtype=xp.int32) *
                            (self.vocab_size + 1)))
                    _, h = self.LSTM(vis=vis_feats, sos=sos)
                else:
                    mask_ = xp.where(w != 0, 1, 0)
                    mask *= mask_
                    if mask.sum() == 0:
                        break
                    w = self.word_emb(Variable(w))
                    _, h = self.LSTM(vis=vis_feats, word=w)
                h = self.out(h)
                logsoft = F.log_softmax(h) * mask.reshape(
                    batch_size, 1).repeat(h.data.shape[1],
                                          axis=1)  # if input==eos then mask

                if stochastic:
                    prob_prev = F.exp(logsoft / temperature)
                    prob_prev /= F.broadcast_to(
                        F.sum(prob_prev, axis=1, keepdims=True),
                        prob_prev.shape)
                    w = softmax_sample(prob_prev)
                else:
                    w = xp.argmax(logsoft.data, axis=1)
                output[:, i] = w
                log_probs.append(logsoft[np.arange(batch_size),
                                         w].reshape(1, batch_size))
        return output, F.concat(log_probs, axis=0)
    def Fissher(self, imageset, shape, gpu, num_samples):

        if gpu >= 0:
            xp = cp
        else:
            xp = np

        num_samples = num_samples

        self.F_accum = []
        for v in range(len(self.var_list)):
            self.F_accum.append(xp.zeros(self.var_list[v].data.shape))

        for i in range(num_samples):
            c, w, h = shape
            x = np.ndarray((1, c, w, h), dtype=np.float32)
            y = np.ndarray((1, ), dtype=np.int32)
            rnd = np.random.randint(len(imageset))
            path = imageset[rnd][0]
            label = imageset[rnd][1]
            x[0] = np.array(path)
            y[0] = np.array(label)
            if gpu >= 0:
                x = cuda.to_gpu(x)
                y = cuda.to_gpu(y)

            x = chainer.Variable(x)
            y = chainer.Variable(y)

            probs = F.log_softmax(self.predict(x, y))
            class_ind = np.argmax(cuda.to_cpu(probs.data))
            loss = probs[0, class_ind]
            self.cleargrads()
            loss.backward()
            for v in range(len(self.F_accum)):
                self.F_accum[v] += xp.square(self.var_list[v].grad)

        # divide totals by number of samples
        for v in range(len(self.F_accum)):
            self.F_accum[v] /= num_samples
        print "Fii", self.F_accum[0]
Exemple #44
0
    def softmax_cross_entropy(self, y, t):
        import numpy as np

        log_softmax = F.log_softmax(y)
        # SelectItem is not supported by onnx-chainer.
        # TODO(hamaji): Support it?
        # log_prob = F.select_item(log_softmax, t)

        # TODO(hamaji): Currently, F.sum with axis=1 cannot be
        # backpropped properly.
        # log_prob = F.sum(log_softmax * t, axis=1)
        # self.batch_size = chainer.Variable(np.array(t.size, np.float32),
        #                                    name='batch_size')
        # return -F.sum(log_prob, axis=0) / self.batch_size
        log_prob = F.sum(log_softmax * t, axis=(0, 1))
        batch_size = chainer.Variable(np.array(t.shape[0], np.float32),
                                      name='batch_size')
        self.extra_inputs = [batch_size]
        loss = -log_prob / batch_size
        loss.name = 'loss'
        return loss
Exemple #45
0
    def compute_fisher(self, dataset):
        fisher_accum_list = [
            np.zeros(var[1].shape) for var in self.variable_list
        ]

        for _ in range(self.num_samples):
            x, _ = dataset[np.random.randint(len(dataset))]
            y = self.predictor(np.array([x]))
            prob_list = F.softmax(y)[0].data
            class_index = np.random.choice(len(prob_list), p=prob_list)
            loss = F.log_softmax(y)[0, class_index]
            self.cleargrads()
            loss.backward()
            for i in range(len(self.variable_list)):
                fisher_accum_list[i] += np.square(
                    self.variable_list[i][1].grad)

        self.fisher_list = [
            F_accum / self.num_samples for F_accum in fisher_accum_list
        ]
        return self.fisher_list
Exemple #46
0
 def forward(self, inputs, device):
     x, = inputs
     return functions.log_softmax(x, axis=self.axis),
Exemple #47
0
 def forward(self):
     x = chainer.Variable(self.x)
     return functions.log_softmax(x)
Exemple #48
0
 def f(x):
     return functions.log_softmax(x, self.axis)
Exemple #49
0
def kl_loss(xp, p_logit, q_logit):
    p = F.softmax(p_logit)
    _kl = F.sum(p * (F.log_softmax(p_logit) - F.log_softmax(q_logit)), 1)
    return F.sum(_kl) / xp.prod(xp.array(_kl.shape))
Exemple #50
0
    def __call__(self, y):
        y_normalized = F.softmax(y)
        y_log_softmax = F.log_softmax(y)
        n = y.data.shape[0]

        return - F.sum(y_normalized * y_log_softmax) / n
Exemple #51
0
 def forward(self):
     x = chainer.Variable(self.x)
     return functions.log_softmax(x, use_cudnn=self.use_cudnn)
Exemple #52
0
 def __call__(self, y):
     s = F.softmax(y)
     log_s = F.log_softmax(y)
     N = s.data.shape[0]
     # - * - is + due to maximizing entropy
     return F.sum(s*log_s) / N  # over batch
Exemple #53
0
 def log_probs(self):
     return F.log_softmax(self.logits)
Exemple #54
0
	def __call__(self, x):
		return F.log_softmax(x, self.use_cudnn)
 def entropy(self):
     logli = F.log_softmax(self.logits)
     return F.sum(-logli * F.exp(logli), axis=-1)