コード例 #1
0
    def aux_gamma_beta(self, i, gamma, j, beta):
        # logging.info("(%d,%d): g %.4f; b %.4f", i, j, gamma, beta)
        stv_kls = np.zeros(self.n_tests)
        sntv_liars_kls = np.zeros(self.n_tests)
        sntv_kls = np.zeros(self.n_tests)
        for test_nr in range(self.n_tests):
            true_preferences = PreferenceCreator(self.n, self.m, self.political_spectrum).create_preferences()
            poll_results = get_poll(true_preferences, gamma)
            man_ballot = manipulate(true_preferences, np.nonzero(poll_results < self.electoral_threshold)[0], beta)
            first_distribution = get_first_choice_dist(true_preferences)

            stv_scores, *_ = voting.STV_scores(true_preferences, self.electoral_threshold, percentage=True)
            stv_outcome = apportion.largest_remainder(stv_scores, self.seats)
            stv_kls[test_nr] = utils.kl_divergence(stv_outcome / self.seats, first_distribution)

            # SNTV outcome truthful
            sntv_scores, *_ = voting.SNTV_scores(true_preferences, self.electoral_threshold, percentage=True)
            sntv_outcome = apportion.largest_remainder(sntv_scores, self.seats)
            sntv_kls[test_nr] = utils.kl_divergence(sntv_outcome / self.seats, first_distribution)
            del sntv_scores, sntv_outcome

            # SNTV outcome liars
            sntv_scores, *_ = voting.SNTV_scores(man_ballot, self.electoral_threshold, percentage=True)
            sntv_outcome = apportion.largest_remainder(sntv_scores, self.seats)
            sntv_liars_kls[test_nr] = utils.kl_divergence(sntv_outcome / self.seats, first_distribution)
        logging.info("(%d,%d): g %.4f; b %.4f; kl-sntv_liars: %.4f", i, j, gamma, beta, sntv_liars_kls.mean())
        return i, j, (stv_kls.mean(), sntv_kls.mean(), sntv_liars_kls.mean())
コード例 #2
0
def lalala(electoral_threshold, m, n, n_tests, political_spectrum, seats, beta, poll_covid):
    stv_propor = []
    sntv_propor = []
    for test_nr in range(n_tests):
        true_preferences = PreferenceCreator(n, m, political_spectrum).create_preferences()
        poll_results = get_poll(true_preferences, poll_covid)
        man_ballot = manipulate(true_preferences, np.nonzero(poll_results < electoral_threshold)[0], beta)

        # STV outcome
        # TODO reestablish the natural order
        stv_scores, *_ = voting.STV_scores(true_preferences, electoral_threshold, percentage=True)
        stv_outcome = apportion.largest_remainder(stv_scores, seats)

        # SNTV outcome

        sntv_scores, *_ = voting.SNTV_scores(man_ballot, electoral_threshold, percentage=True)
        sntv_outcome = apportion.largest_remainder(sntv_scores, seats)

        first_distribution = get_first_choice_dist(true_preferences)

        # Proportionality
        stv_propor.append(utils.kl_divergence(stv_outcome / seats, first_distribution))
        sntv_propor.append(utils.kl_divergence(sntv_outcome / seats, first_distribution))

    return np.array(stv_propor).mean(), np.array(sntv_propor).mean()
コード例 #3
0
def test_gamma_beta(electoral_threshold, m, n, n_tests, political_spectrum, seats):
    steps = 3
    gammass = np.geomspace(0.0001, 1, num=steps)
    betass = np.linspace(0, 1, num=steps)
    results = {'stv': np.zeros((steps, steps)), 'sntv': np.zeros((steps, steps))}

    for i, gamma in enumerate(gammass):
        for j, beta in enumerate(betass):
            logging.info("g %.4f; b %.4f", gamma, beta)
            stv_kls = np.zeros(n_tests)
            sntv_kls = np.zeros(n_tests)
            for test_nr in range(n_tests):
                true_preferences = PreferenceCreator(n, m, political_spectrum).create_preferences()
                poll_results = get_poll(true_preferences, gamma)
                man_ballot = manipulate(true_preferences, np.nonzero(poll_results < electoral_threshold)[0], beta)
                first_distribution = get_first_choice_dist(true_preferences)

                stv_scores, *_ = voting.STV_scores(true_preferences, electoral_threshold, percentage=True)
                stv_outcome = apportion.largest_remainder(stv_scores, seats)
                stv_kls[test_nr] = utils.kl_divergence(stv_outcome / seats, first_distribution)

                # SNTV outcome
                sntv_scores, *_ = voting.SNTV_scores(man_ballot, electoral_threshold, percentage=True)
                sntv_outcome = apportion.largest_remainder(sntv_scores, seats)
                sntv_kls[test_nr] = utils.kl_divergence(sntv_outcome / seats, first_distribution)

            results['stv'][i, j] = stv_kls.mean()
            results['sntv'][i, j] = sntv_kls.mean()

    return np.meshgrid(gammass, betass), results
コード例 #4
0
def get_metrics(a, b, metric_type, operator_func):
    if metric_type == "kl_divergence":
        s1 = np.array([kl_divergence(a, b)])
        s2 = np.array(kl_divergence(b, a))

    elif metric_type == "js_divergence":
        s1 = np.array([js_divergence(a, b)])
        s2 = np.array(js_divergence(b, a))

    elif metric_type == "entropy":
        s1 = np.array([entropy(a)])
        s2 = np.array([entropy(b)])
    return operator_func(operator, s1, s2)
コード例 #5
0
def elbo(model, x, K=1):
    """Computes E_{p(x)}[ELBO] """
    qz_x, px_z, _ = model(x)
    lpx_z = px_z.log_prob(x).view(*px_z.batch_shape[:2],
                                  -1) * model.llik_scaling
    kld = kl_divergence(qz_x, model.pz(*model.pz_params))
    return (lpx_z.sum(-1) - kld.sum(-1)).mean(0).sum()
コード例 #6
0
    def times_objective(self, mf, K=1, beta=1., alpha=1.):
        """
            Optimizer

            Args:
                model: our model
                mf: Multifactors
                rets: realized returns
        """
        qz_x, px_z, style_score = self.model(mf, K=K)

        # Reconstruction
        lpx_z = px_z.log_prob(mf).sum(-1)
        pz = self.model.pz(*self.model.pz_params)

        # Overlap
        kld = kl_divergence(qz_x, pz, samples=style_score).sum(-1)

        # Sparsity
        reg = (self.regs(pz.sample(torch.Size([mf.size(0)])).\
                view(-1, style_score.size(-1)), style_score) \
                         if self.regs.samples else self.regs(pz, qz_x))

        obj = lpx_z - (beta * kld) - (alpha * reg)

        recon = lpx_z.sum()
        overlap = kld.sum()
        sparsity = reg.sum()

        return obj.sum(), recon, overlap, sparsity
コード例 #7
0
def get_metrics(a, b, metric_type, operator_func):
    if metric_type == "kl_divergence":
        s1 = np.array([kl_divergence(a, b)])
        s2 = np.array(kl_divergence(b, a))

    elif metric_type == "js_divergence":
        s1 = np.array([js_divergence(a, b)])
        s2 = np.array(js_divergence(b, a))

    elif metric_type == "entropy":
        s1 = np.array([entropy(a)])
        s2 = np.array([entropy(b)])
    else:
        raise ValueError("metric_type undefined")

    return operator_func(operator, s1, s2)
コード例 #8
0
    def cost(self, Xi, kl_reg):
        f_KM, a_KM = self.f_KM, self.a_KM

        Xi_f = Xi[:self.lib_f.shape[1]]
        Xi_s = Xi[self.lib_f.shape[1]:]

        f_vals = self.lib_f @ Xi_f

        s_vals = self.lib_s @ Xi_s
        a_vals = 0.5 * s_vals**2
        f_vals += a_vals / self.centers1d  # Diffusion-induced drift from polar change of variables

        # Solve adjoint Fokker-Planck equation
        self.afp.precompute_operator(f_vals, a_vals)
        f_tau, a_tau = self.afp.solve(self.tau, d=0)

        mask = np.nonzero(np.isfinite(f_KM))[0]
        V = np.sum(self.W[0, mask]*abs(f_tau[mask] - f_KM[mask])**2) \
          + np.sum(self.W[1, mask]*abs(a_tau[mask] - a_KM[mask])**2)

        if kl_reg > 0:
            p_est = self.fp.solve(f_vals, a_vals)

            kl = utils.kl_divergence(self.p_hist,
                                     p_est,
                                     dx=self.fp.dx,
                                     tol=1e-6)
            kl = max(0, kl)
            V += kl_reg * kl

        return V
コード例 #9
0
def test_alpha(electoral_threshold, m, n, n_tests, political_spectrum, seats, poll_covid):
    props = []
    # progressbar = tqdm(total=50)
    alphass = np.linspace(0.01, 1, num=10)
    for i, alpha in enumerate(alphass):
        print(i)
        # progressbar.update()
        astv_propor = []
        astv_l_propor = []
        for test_nr in range(n_tests):
            true_preferences = PreferenceCreator(n, m, political_spectrum).create_preferences()
            poll_results = get_poll(true_preferences, poll_covid)
            man_ballot = manipulate(true_preferences, np.nonzero(poll_results < electoral_threshold)[0], 1.)
            first_distribution = get_first_choice_dist(true_preferences)

            # a-STV outcome
            astv_scores, *_ = voting.alpha_STV_scores(true_preferences, alpha, electoral_threshold, percentage=True)
            astv_outcome = apportion.largest_remainder(astv_scores, seats)

            # a-STV liars outcome
            astv_l_scores, *_ = voting.alpha_STV_scores(man_ballot, alpha, electoral_threshold, percentage=True)
            astv_l_outcome = apportion.largest_remainder(astv_l_scores, seats)

            astv_propor.append(utils.kl_divergence(astv_outcome / seats, first_distribution))
            astv_l_propor.append(utils.kl_divergence(astv_l_outcome / seats, first_distribution))

        a = np.array(astv_propor).mean()
        al = np.array(astv_l_propor).mean()

        # a = lalala(electoral_threshold, m, n, n_tests, political_spectrum, seats, beta, 0.01)
        props.append((a, al))
    # progressbar.close()
    mean_astv_prop, mean_astv_l_prop = list(zip(*props))
    a = np.array(mean_astv_prop)
    b = np.array(mean_astv_l_prop)
    # print(a, b)
    plt.plot(alphass, a, label='a-stv')
    plt.plot(alphass, b, label='a-stv-liars')
    plt.legend()
    plt.show()
コード例 #10
0
    def train(self, epoch):
        self.optimizer.lr = self.lr_schedule(epoch)
        train_loss = 0
        train_acc = 0
        for i, batch in enumerate(self.train_iter):
            x_array, t_array = chainer.dataset.concat_examples(batch)
            x = chainer.Variable(cuda.to_gpu(x_array))
            t = chainer.Variable(cuda.to_gpu(t_array))
            # self.optimizer.use_cleargrads(use=False)
            # self.optimizer.use_cleargrads()
            ## self.optimizer.reallocate_cleared_grads()
            # x.cleargrad()
            # t.cleargrad()
            # self.optimizer.zero_grads()
            # self.optimizer.setup(model)

            ###     with chainer.no_backprop_mode(): This is not the origin
            y = self.model(x)
            self.model.cleargrads()

            if self.opt.BC:
                loss = utils.kl_divergence(y, t)
                acc = F.accuracy(y, F.argmax(t, axis=1))
            else:
                loss = F.softmax_cross_entropy(y, t)
                acc = F.accuracy(y, t)

            # self.optimizer.check_nan_in_grads()
            self.optimizer.use_cleargrads(use=True)

            loss.backward()
            self.optimizer.update()
            train_loss += float(loss.data) * len(t.data)
            train_acc += float(acc.data) * len(t.data)

            elapsed_time = time.time() - self.start_time
            progress = (self.n_batches * (epoch - 1) + i +
                        1) * 1.0 / (self.n_batches * self.opt.nEpochs)
            eta = elapsed_time / progress - elapsed_time

            line = '* Epoch: {}/{} ({}/{}) | Train: LR {} | Time: {} (ETA: {})'.format(
                epoch, self.opt.nEpochs, i + 1,
                self.n_batches, self.optimizer.lr, utils.to_hms(elapsed_time),
                utils.to_hms(eta))
            sys.stderr.write('\r\033[K' + line)
            sys.stderr.flush()

        self.train_iter.reset()
        train_loss /= len(self.train_iter.dataset)
        train_top1 = 100 * (1 - train_acc / len(self.train_iter.dataset))

        return train_loss, train_top1
コード例 #11
0
def m_elbo_naive(model, x, K=1):
    """Computes E_{p(x)}[ELBO] for multi-modal vae --- NOT EXPOSED"""
    qz_xs, px_zs, zss = model(x)
    lpx_zs, klds = [], []
    for r, qz_x in enumerate(qz_xs):
        kld = kl_divergence(qz_x, model.pz(*model.pz_params))
        klds.append(kld.sum(-1))
        for d, px_z in enumerate(px_zs[r]):
            lpx_z = px_z.log_prob(x[d]) * model.vaes[d].llik_scaling
            lpx_zs.append(lpx_z.view(*px_z.batch_shape[:2], -1).sum(-1))
    obj = (1 / len(model.vaes)) * (torch.stack(lpx_zs).sum(0) -
                                   torch.stack(klds).sum(0))
    return obj.mean(0).sum()
コード例 #12
0
 def analyse(self, data, K):
     self.eval()
     with torch.no_grad():
         qz_xs, _, zss = self.forward(data, K=K)
         pz = self.pz(*self.pz_params)
         zss = [
             pz.sample(torch.Size([K, data[0].size(0)
                                   ])).view(-1, pz.batch_shape[-1]),
             *[zs.view(-1, zs.size(-1)) for zs in zss]
         ]
         zsl = [
             torch.zeros(zs.size(0)).fill_(i) for i, zs in enumerate(zss)
         ]
         kls_df = tensors_to_df(
             [
                 *[kl_divergence(qz_x, pz).cpu().numpy()
                   for qz_x in qz_xs], *[
                       0.5 * (kl_divergence(p, q) +
                              kl_divergence(q, p)).cpu().numpy()
                       for p, q in combinations(qz_xs, 2)
                   ]
             ],
             head='KL',
             keys=[
                 *[
                     r'KL$(q(z|x_{})\,||\,p(z))$'.format(i)
                     for i in range(len(qz_xs))
                 ], *[
                     r'J$(q(z|x_{})\,||\,q(z|x_{}))$'.format(i, j)
                     for i, j in combinations(range(len(qz_xs)), 2)
                 ]
             ],
             ax_names=['Dimensions', r'KL$(q\,||\,p)$'])
     return embed_umap(torch.cat(zss, 0).cpu().numpy()), \
         torch.cat(zsl, 0).cpu().numpy(), \
         kls_df
コード例 #13
0
    def train(self, epoch):
        """
            run one train epoch
        """
         
        train_loss = 0
        train_acc = 0
        for i, (x_array, t_array) in enumerate(self.train_iter):
            device = torch.device("cuda" if cuda.is_available() else "cpu")
            self.optimizer.zero_grad()

            x = x_array.to(device)
            t = t_array.to(device)
            y = self.model(x)
            if self.opt.BC:
                t = t.to(device, dtype=torch.float32)
                y = y.to(device, dtype=torch.float32)
                loss = utils.kl_divergence(y, t)
                t_indices = torch.argmax(t, dim=1)
                acc = accuracy(y.data, t_indices)
            else:
                """ F.cross_entropy already combines log_softmax and NLLLoss """
                t = t.to(device, dtype=torch.int64)
                loss = F.cross_entropy(y, t)
                acc = accuracy(y.data, t)

            
            loss.backward()
            self.optimizer.step()

            train_loss += float(loss.item()) * len(t.data)
            train_acc += float(acc.item()) * len(t.data)

            elapsed_time = time.time() - self.start_time
            progress = (self.n_batches * (epoch - 1) + i + 1) * 1.0 / (self.n_batches * self.opt.nEpochs)
            eta = elapsed_time / progress - elapsed_time

            line = '* Epoch: {}/{} ({}/{}) | Train: LR {} | Time: {} (ETA: {})'.format(
                epoch, self.opt.nEpochs, i + 1, self.n_batches,
                self.scheduler.get_last_lr(), utils.to_hms(elapsed_time), utils.to_hms(eta))
            sys.stderr.write('\r\033[K' + line)
            sys.stderr.flush()

        train_loss /= len(self.train_iter.dataset)
        train_top1 = 100 * (1 - train_acc / len(self.train_iter.dataset))

        return train_loss, train_top1
コード例 #14
0
ファイル: objectives.py プロジェクト: dichotomies/decomp-vae
def decomp_objective(model,
                     x,
                     K=1,
                     beta=1.0,
                     alpha=0.0,
                     regs=None,
                     components=False):
    """Computes E_{p(x)}[ELBO_{\alpha,\beta}] """
    qz_x, px_z, zs = model(x, K)
    lpx_z = px_z.log_prob(x).view(*px_z.batch_shape[:2], -1).sum(-1)
    pz = model.pz(*model.pz_params)
    kld = kl_divergence(qz_x, pz, samples=zs).sum(-1)
    reg = (regs(pz.sample(torch.Size([x.size(0)])).view(-1, zs.size(-1)), zs.squeeze(0)) if regs.samples else regs(pz, qz_x)) \
        if regs else torch.tensor(0)
    obj = lpx_z - (beta * kld) - (alpha * reg)
    return obj.sum() if not components else (obj.sum(), lpx_z.sum(), kld.sum(),
                                             reg.sum())
コード例 #15
0
    def cost(self, Xi, kl_reg):
        r"""Least-squares cost function for optimization"""
        f_KM, a_KM = self.f_KM[0].flatten(), self.a_KM[0].flatten()

        Xi_f = Xi[:self.lib_f.shape[1]]
        Xi_s = Xi[self.lib_f.shape[1]:]

        f_vals = self.lib_f @ Xi_f
        s_vals = self.lib_s @ Xi_s
        a_vals = 0.5 * (np.real(s_vals)**2 + 1j * (np.imag(s_vals))**2)

        # Solve adjoint Fokker-Planck equation
        self.afp.precompute_operator(
            [np.real(f_vals), np.imag(f_vals)],
            [np.real(a_vals), np.imag(a_vals)])
        f_tau, a_tau = self.afp.solve(self.tau,
                                      d=0)  # Assumes real/imag symmetry

        mask = np.nonzero(np.isfinite(f_KM))[0]
        V = np.sum(self.W[0, mask]*abs(f_tau[mask] - f_KM[mask])**2) \
          + np.sum(self.W[1, mask]*abs(a_tau[mask] - a_KM[mask])**2)

        if kl_reg > 0:
            p_est = self.fp.solve([
                np.reshape(np.real(f_vals), self.fp.N),
                np.reshape(np.imag(f_vals), self.fp.N)
            ], [
                np.reshape(np.real(a_vals), self.fp.N),
                np.reshape(np.imag(a_vals), self.fp.N)
            ])

            kl = utils.kl_divergence(self.p_hist,
                                     p_est,
                                     dx=self.fp.dx,
                                     tol=1e-6)
            kl = max(0, kl)
            V += kl_reg * kl

        if not np.isfinite(V):
            print('Error in cost function')
            print(Xi)
            print(f)
            print(a)
            return None

        return V
コード例 #16
0
def rank_phrase(case_file):
    ph_dist_map = {}
    smoothing_factor = 0.0
    phrase_map, cell_map, cell_cnt = read_caseolap_result(case_file)
    unif = [1.0 / cell_cnt] * cell_cnt
    
    for ph in phrase_map:
        ph_vec = [x[1] for x in phrase_map[ph].items()]   # Modified by MILI
        if len(ph_vec) < cell_cnt:
            ph_vec += [0] * (cell_cnt - len(ph_vec))
		# smoothing
        ph_vec = [x + smoothing_factor for x in ph_vec]
        ph_vec = utils.l1_normalize(ph_vec)
        ph_dist_map[ph] = utils.kl_divergence(ph_vec, unif)
        
    ranked_list = sorted(ph_dist_map.items(), key=operator.itemgetter(1), reverse=True)
    
    return ranked_list
コード例 #17
0
ファイル: vae.py プロジェクト: lzj1769/mmvae
 def analyse(self, data, K):
     self.eval()
     with torch.no_grad():
         qz_x, _, zs = self.forward(data, K=K)
         pz = self.pz(*self.pz_params)
         zss = [
             pz.sample(torch.Size([K, data.size(0)
                                   ])).view(-1, pz.batch_shape[-1]),
             zs.view(-1, zs.size(-1))
         ]
         zsl = [
             torch.zeros(zs.size(0)).fill_(i) for i, zs in enumerate(zss)
         ]
         kls_df = tensors_to_df([kl_divergence(qz_x, pz).cpu().numpy()],
                                head='KL',
                                keys=[r'KL$(q(z|x)\,||\,p(z))$'],
                                ax_names=['Dimensions', r'KL$(q\,||\,p)$'])
     return embed_umap(torch.cat(zss, 0).cpu().numpy()), \
         torch.cat(zsl, 0).cpu().numpy(), \
         kls_df
コード例 #18
0
ファイル: train.py プロジェクト: midas-research/speechmix
    def train(self, epoch):
        self.optimizer.lr = self.lr_schedule(epoch)
        train_loss = 0
        train_acc = 0
        for i, batch in enumerate(self.train_iter):
            x_array, t_array = chainer.dataset.concat_examples(batch)
            x_array = np.reshape(x_array,(self.opt.batchSize*2,-1)).astype('float32')
            t_array = np.reshape(t_array,(self.opt.batchSize*2,-1)).astype('float32')
            x = chainer.Variable(cuda.to_gpu(x_array[:, None, None, :]))
            t = chainer.Variable(cuda.to_gpu(t_array))
            self.model.cleargrads()
            y , t = self.model(x, t, self.opt.mixup_type, self.opt.eligible, self.opt.batchSize)
            if self.opt.BC:
                loss = utils.kl_divergence(y, t)
                acc = F.accuracy(y, F.argmax(t, axis=1))
            else:
                loss = F.softmax_cross_entropy(y, t)
                acc = F.accuracy(y, t)

            loss.backward()
            self.optimizer.update()
            train_loss += float(loss.data) * len(t.data)
            train_acc += float(acc.data) * len(t.data)

            elapsed_time = time.time() - self.start_time
            progress = (self.n_batches * (epoch - 1) + i + 1) * 1.0 / (self.n_batches * self.opt.nEpochs)
            if ((progress)!=0):
                eta = elapsed_time / progress - elapsed_time
            else:
                eta = 0
            line = '* Epoch: {}/{} ({}/{}) | Train: LR {} | Time: {} (ETA: {})'.format(
                epoch, self.opt.nEpochs, i + 1, self.n_batches,
                self.optimizer.lr, utils.to_hms(elapsed_time), utils.to_hms(eta))
            sys.stderr.write('\r\033[K' + line)
            sys.stderr.flush()

        self.train_iter.reset()
        train_loss /= len(self.train_iter.dataset)*2
        train_top1 = 100 * (1 - train_acc / (len(self.train_iter.dataset)*2))

        return train_loss, train_top1
コード例 #19
0
def m_elbo(model, x, K=1):
    """Computes importance-sampled m_elbo (in notes3) for multi-modal vae """
    qz_xs, px_zs, zss = model(x)
    lpx_zs, klds = [], []
    for r, qz_x in enumerate(qz_xs):
        kld = kl_divergence(qz_x, model.pz(*model.pz_params))
        klds.append(kld.sum(-1))
        for d in range(len(px_zs)):
            lpx_z = px_zs[d][d].log_prob(x[d]).view(
                *px_zs[d][d].batch_shape[:2], -1)
            lpx_z = (lpx_z * model.vaes[d].llik_scaling).sum(-1)
            if d == r:
                lwt = torch.tensor(0.0)
            else:
                zs = zss[d].detach()
                lwt = (qz_x.log_prob(zs) -
                       qz_xs[d].log_prob(zs).detach()).sum(-1)
            lpx_zs.append(lwt.exp() * lpx_z)
    obj = (1 / len(model.vaes)) * (torch.stack(lpx_zs).sum(0) -
                                   torch.stack(klds).sum(0))
    return obj.mean(0).sum()
コード例 #20
0
def reweight(embs, dp_file, lp_file):
	source_type = 'd'
	target_type = 'l'
	mid_type = 'p'

	ori_embs = embs
	agg_embs = copy.copy(embs)

	# Step 0: check original embedding's performance
	print '*********** Direct Embedding'
	evaluate(ori_embs, true_file, target_dim)

	pd_map = load_dp(dp_file, reverse=True)
	dp_map = load_edge_map(dp_file)
	lp_map = load_edge_map(lp_file)
	dist_map = {x:1 for x in embs[mid_type]}
	vec_size = 0
	for d in ori_embs[mid_type]:
		vec_size = len(ori_embs[mid_type][d])
		break

	# print '============= dp, pd maps loaded'


	# Step 1: check with D weighted avg, what's the performance
	agg_embs[source_type] = weighted_avg_embedding(dp_map, agg_embs[mid_type], dist_map, vec_size)

	# optional L - embedding also aggregated from P
	normal = False

	if not normal:
		agg_embs[target_type] = weighted_avg_embedding(lp_map, agg_embs[mid_type], dist_map, vec_size)


	# print '============= doc embedding aggregated.'

	print '*********** Aggregate iter 0'
	evaluate(agg_embs, true_file, target_dim)

	

	for i in range(2):

		if i > 0:
			normal = True

		print '============= iter ' + str(i+1) + ' of dist started.'

		pred_label, doc_score = doc_assignment(agg_embs, source_type, target_type)
		top_labels = [w.path for w in hier.get_nodes_at_level(1)]

		# print '============= docs assigned to labels'

		# # print meta stats
		# top_label_cnts = {}
		# for label in top_labels:
		# 	top_label_cnts[label] = 0
		# for doc_pair in filtered_docs:
		# 	l = pred_label[doc_pair[0]]
		# 	top_label_cnts[l] += 1
		# print top_label_cnts
		# print 'top level labels: ' + str(top_labels)

		label_to_idx = {}
		for idx, label in enumerate(top_labels):
			label_to_idx[label] = idx
		uniform_vec = [1.0/len(top_labels)] * len(top_labels)
		# print uniform_vec
		label_to_doc = {}
		
		for label in top_labels:
			label_to_doc[label] = set()


		docs_used = {}

		if normal:
			print 'used docs in reweighting: ' + str(len(pred_label))
			for doc, score in doc_score.iteritems():
				label_to_doc[pred_label[doc]].add(doc)
		else:
			for label in top_labels:
				p = label.lower()
				# idx = label_to_idx[label]
				for doc in pd_map[p]:
					label_to_doc[label].add(doc)
					if doc not in docs_used:
						docs_used[doc] = set()
					docs_used[doc].add(label)
			print 'docs used: %d' % len(docs_used)




		cnt_vec = [0.0] * len(top_labels)
		for label in label_to_doc:
			cnt_vec[label_to_idx[label]] = len(label_to_doc[label])
		comp_vec = utils.l1_normalize(cnt_vec)

		print cnt_vec

		# print comp_vec

		distinct_map = {}
		
		if normal:
			for phrase in embs[mid_type]:
				p_vec = [0.0] * len(top_labels)

				# if len(pd_map[phrase]) < 100:
				# 	continue

				for doc in pd_map[phrase]:
					idx = label_to_idx[pred_label[doc]]
					p_vec[idx] += 1.0
				
				if sum(p_vec) == 0:
					print 'ERROR!!!!!!!!!!'
					continue

				p_vec = utils.l1_normalize(p_vec)

				# kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec)
				kl = utils.kl_divergence(p_vec, uniform_vec)
				# kl = utils.kl_divergence(p_vec, comp_vec)
				distinct_map[phrase] = kl
		else:
			for phrase in embs[mid_type]:
				p_vec = [0.0] * len(top_labels)

				# if len(pd_map[phrase]) < 100:
				# 	continue

				for doc in pd_map[phrase]:
					if doc in docs_used:
						for label in docs_used[doc]:
							idx = label_to_idx[label]
							p_vec[idx] += 1.0

				# print p_vec
				
				if sum(p_vec) == 0:
					distinct_map[phrase] = 0
					# print 'ERROR!!!!!!!!!!'
					continue
				
				# p_vec = [x / cnt_vec[i] for i, x in enumerate(p_vec)]


				p_vec = utils.l1_normalize(p_vec)

				# kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec)
				# kl = utils.kl_divergence(p_vec, uniform_vec)
				kl = utils.kl_divergence(p_vec, comp_vec)
				distinct_map[phrase] = kl

		dist_map = distinct_map
		# with open('focal_comp.txt', 'w+') as g:
		# 	for (ph, score) in sorted(dist_map.items(), key=operator.itemgetter(1), reverse=True):
		# 		g.write('%s,%f\t' % (ph, score))

		# print '============= phrase distinctness computed.'

		agg_embs[source_type] = weighted_avg_embedding(dp_map, agg_embs[mid_type], dist_map, vec_size)
		# print '============= doc embedding aggregated.'

		print '*********** Aggregate with distinct at iter ' + str(i + 1)
		evaluate(agg_embs, true_file, target_dim)
コード例 #21
0
def expan_round(embs, seeds_map, all_seeds, limit, cate_lim, mode='EMB', pd_map=None):

	target_type = 'p'

	multiplier = 5
	thre_softmax = 0.5

	extended_seeds = set()
	candidates = {}

	if mode == 'EMB':
		for phrase in embs[target_type]:
			if phrase in all_seeds:
				continue
			t_emb = embs[target_type][phrase]
			rel_values = {}
			# flat comparison
			for label in seeds_map:
				max_sim = 0
				for seed in seeds_map[label]:
					sim = multiplier * utils.cossim(t_emb, embs[target_type][seed])
					if sim > max_sim:
						max_sim = sim
				rel_values[label] = max_sim

			utils.softmax_for_map(rel_values)
			best_label = sorted(rel_values.items(), key=operator.itemgetter(1), reverse=True)[0][0]
			candidates[best_label + '@' + phrase] = rel_values[best_label]
	
	elif mode == 'DIS':
		pred_label, doc_score = doc_assignment(embs, 'd', 'l', mode='FLAT')
		top_labels = [w.path for w in hier.get_all_nodes()]
		print 'Doc Assignment done...'

		label_to_idx = {}
		for idx, label in enumerate(top_labels):
			label_to_idx[label] = idx
		# print uniform_vec
		label_to_doc = {}
		
		for label in top_labels:
			label_to_doc[label] = set()
		for doc, score in doc_score.iteritems():
			label_to_doc[pred_label[doc]].add(doc)
		cnt_vec = [0.0] * len(top_labels)
		for label in label_to_doc:
			cnt_vec[label_to_idx[label]] = len(label_to_doc[label])
		comp_vec = utils.l1_normalize(cnt_vec)

		uniform_vec = [1.0/len(top_labels)] * len(top_labels)
		# print cnt_vec
		# print comp_vec

		for phrase in embs['p']:
			if phrase in all_seeds:
				continue

			p_vec = [0.0] * len(top_labels)

			for doc in pd_map[phrase]:
				idx = label_to_idx[pred_label[doc]]
				p_vec[idx] += 1.0

			max_label_value = 0
			best_label = ''
			best_cnt = 0
			for label in top_labels:
				idx = label_to_idx[label]
				if p_vec[idx] > 0:
					norm_value = p_vec[idx] / cnt_vec[idx]
					if norm_value > max_label_value:
						max_label_value = norm_value
						best_label = label
						best_cnt = p_vec[idx]

			if sum(p_vec) == 0:
				print 'ERROR!!!!!!!!!!'
				continue
			p_vec = utils.l1_normalize(p_vec)
			# kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec)
			# kl = utils.kl_divergence(p_vec, comp_vec)
			kl = utils.kl_divergence(p_vec, uniform_vec)

			# best_label = sorted(rel_values.items(), key=operator.itemgetter(1), reverse=True)[0][0]
			pop = max_label_value
			# * (1 + math.log(1 + max_label_value))
			candidates[best_label + '@' + phrase] = kl * max_label_value

	candidates = sorted(candidates.items(), key=operator.itemgetter(1), reverse=True)

	# cands_by_label = {}
	# for cand in candidates:
	# 	label, phrase = cand.split('@')
	# 	if label not in cands_by_label:
	# 		cands_by_label[label] = {}
	# 	cands_by_label[label][phrase] = candidates[cand]

	# for label in cands_by_label:
	# 	print '\n' + label
	# 	cand_cate = cands_by_label[label]
	# 	best_exps = sorted(cand_cate.items(), key=operator.itemgetter(1), reverse=True)[:10]
	# # best_exps = sorted(candidates.items(), key=operator.itemgetter(1), reverse=True)[:30]
	# 	print best_exps

	# exit(1)

	added = 0
	added_cates = {}
	for (cand, score) in candidates:
		label, phrase = cand.split('@')
		if label not in added_cates:
			added_cates[label] = 0
		if added_cates[label] >= cate_lim:
			continue
		if len(seeds_map[label]) >= 3:
			continue
		extended_seeds.add(cand)
		added_cates[label] += 1
		added += 1
		if added > limit:
			break

	print 'extended: ' + str(extended_seeds)
	return extended_seeds
コード例 #22
0
def model_eval(eps, sigma, N, kl_reg):
    """
    Construct and evaluate all models of the double-well
    1. Analytic normal form model
    2. PDF fitting without Kramers-Moyal average
    3. Full Langevin regression
    """
    ### Generate data
    x_eq = np.sqrt(eps)  # Equilibrium value

    edges = np.linspace(-2 * x_eq, 2 * x_eq, N + 1)
    centers = 0.5 * (edges[:-1] + edges[1:])
    dx = centers[1] - centers[0]

    dt = 1e-2
    tmax = int(1e5)
    t, X = jl.run_sim(eps, sigma, dt, tmax)
    X, V = X[0, :], X[1, :]

    # PDF of states
    p_hist = np.histogram(X, edges, density=True)[0]

    # Dwell-time slope
    b, b_err = dwell_stats(X, x_eq, dt)
    print("\tData: ", b, b_err)

    ### 1. Normal form
    lamb1 = -1 + np.sqrt(1 + eps)
    lamb2 = -1 - np.sqrt(1 + eps)
    h = -lamb1 / lamb2
    mu = -(1 + h)**2 * lamb1 / eps

    _, phi1 = jl.run_nf(lamb1, mu, sigma / (2 * np.sqrt(1 + eps)), dt, tmax)
    X_nf = (1 + h) * phi1[0, :]

    # Statistics
    p_nf = np.histogram(X_nf, edges, density=True)[0]
    b_nf, b_nf_err = dwell_stats(X_nf, x_eq, dt)
    print("\tNormal form: ", b_nf, b_nf_err)

    ### 2. PDF fit
    Xi = fit_pdf(X, edges, p_hist, dt, p0=[1, lamb1 / sigma**2, mu / sigma**2])
    #print(Xi)

    # Monte Carlo evaluation
    _, X_pdf = jl.run_nf(Xi[0], Xi[1], Xi[2], dt, tmax)
    X_pdf = X_pdf[0, :]

    # Statistics
    p_pdf = np.histogram(X_pdf, edges, density=True)[0]
    b_pdf, b_pdf_err = dwell_stats(X_pdf, x_eq, dt)
    print("\tPDF fit: ", b_pdf, b_pdf_err)

    ### 3. Langevin regression
    Xi = langevin_regression(X, edges, p_hist, dt, stride=200, kl_reg=kl_reg)
    #print(Xi)

    # Monte Carlo evaluation
    _, X_lr = jl.run_nf(Xi[0], Xi[1], Xi[2], dt, tmax)
    X_lr = X_lr[0, :]

    # Statistics
    p_lr = np.histogram(X_lr, edges, density=True)[0]
    b_lr, b_lr_err = dwell_stats(X_lr, x_eq, dt)
    print("\tLangevin regression: ", b_lr, b_lr_err)

    ### KL-divergence of all models against true data
    KL_nf = utils.kl_divergence(p_hist, p_nf, dx=dx, tol=1e-6)
    KL_pdf = utils.kl_divergence(p_hist, p_pdf, dx=dx, tol=1e-6)
    KL_lr = utils.kl_divergence(p_hist, p_lr, dx=dx, tol=1e-6)
    print("\tKL div: ", KL_nf, KL_pdf, KL_lr)

    return [b, b_nf, b_pdf, b_lr], [KL_nf, KL_pdf, KL_lr]
コード例 #23
0
def reweight_test(embs, dp_file):

	source_type = 'd'
	target_type = 'l'

	target_embs = embs[target_type]
	pred_label = {}
	doc_score = {}
	ratio = 1

	for doc in embs[source_type]:
		doc_emb = embs[source_type][doc]
		sim_map = classify_doc(doc_emb, target_embs)
		pred_label[doc] = hier.get_node(sim_map[0][0]).get_ascendant(1).path
		doc_score[doc] = sim_map[0][1]

	doc_score = sorted(doc_score.items(), key=operator.itemgetter(1), reverse=True)	
	filtered_docs = doc_score[:int(len(doc_score)*ratio)]

	top_labels = [w.path for w in hier.get_nodes_at_level(1)]

	# print meta stats
	top_label_cnts = {}
	for label in top_labels:
		top_label_cnts[label] = 0
	for doc_pair in filtered_docs:
		l = pred_label[doc_pair[0]]
		top_label_cnts[l] += 1
	print top_label_cnts
	print 'top level labels: ' + str(top_labels)
	# return

	label_to_idx = {}
	for idx, label in enumerate(top_labels):
		label_to_idx[label] = idx
	uniform_vec = [1.0/len(top_labels)] * len(top_labels)
	print uniform_vec
	label_to_doc = {}

	# new_filter = []
	# new_pred_ls = {}
	# for (doc, score) in filtered_docs:
	# 	if pred_label[doc] not in top_labels:
	# 		continue
	# 	new_filter.append((doc, score))
	# 	new_pred_ls[doc] = pred_label[doc]
	# filtered_docs = new_filter
	# pred_label = new_pred_ls

	pd_map = load_dp(dp_file, reverse=True)
	
	for label in top_labels:
		label_to_doc[label] = set()

	print 'used docs in reweighting: ' + str(len(filtered_docs))
	for (doc, score) in filtered_docs:
		label_to_doc[pred_label[doc]].add(doc)

	distinct_map = {}
	cnt = 0
	for phrase in embs['p']:
		p_vec = [0.0] * len(top_labels)

		if len(pd_map[phrase]) < 100:
			continue

		for doc in pd_map[phrase]:
			if doc not in pred_label:
				continue
			idx = label_to_idx[pred_label[doc]]
			p_vec[idx] += 1.0
		
		if sum(p_vec) == 0:
			continue

		p_vec = utils.l1_normalize(p_vec)

		kl = utils.kl_divergence(p_vec, uniform_vec)
		distinct_map[phrase] = kl

	distinct_map = sorted(distinct_map.items(), key=operator.itemgetter(1), reverse=False)
	print distinct_map[:100]
	print 
	print distinct_map[:-100]
コード例 #24
0
def objective(vae_model,
              classification_model,
              device,
              x,
              mask,
              label,
              beta,
              nu,
              K=10,
              kappa=1.0,
              components=False):
    """Computes E_{p(x)}[ELBO_{\alpha,\beta}] """
    types = vae_model.disease_types
    qz_x, px_z, zs = vae_model(x, K)

    # compute supervised loss
    bce = torch.nn.BCELoss()
    pred = classification_model(zs.squeeze(0))
    supervised_loss = bce(pred, label)

    # compute vae loss
    lpx_z = px_z.log_prob(x).view(*px_z.batch_shape[:2], -1).sum(-1)
    pz = vae_model.pz(*vae_model.pz_params)
    kld = kl_divergence(qz_x, pz, samples=zs).sum(-1)

    # compute kl(p(z), q(z))
    B, D = qz_x.loc.shape
    _zs = pz.rsample(torch.Size([B]))
    lpz = pz.log_prob(_zs).sum(-1).squeeze(-1)

    _zs_expand = _zs.expand(B, B, D)
    lqz = qz_x.log_prob(_zs_expand).sum(-1)  #B*B

    qz = []
    _max = torch.max(lqz)
    for i in range(types):
        ds = lqz[:, (mask == i)] - _max
        qz_j = torch.exp(ds)  #B*k
        qz.append((qz_j * beta[i]))
    qz = torch.cat(qz, dim=1).to(device)
    lqz = torch.sum(qz, dim=1)
    lqz = _max + torch.log(lqz) - math.log(qz.size(1))

    inc_kld = lpz - lqz
    inc_kld = inc_kld.mean(0, keepdim=True).expand(1, B)
    inc_kld = inc_kld.mean(0).sum() / B

    # compute kl(q(z), N(z))
    _zs = qz_x.rsample(torch.Size([B]))  #B*B*D
    lqz = qz_x.log_prob(_zs)  #B*B*D
    kld2 = []
    for i in range(types):
        tmp_zs = _zs[:, mask == i, :]  #B*k*D
        lnj = dist.Normal(vae_model.pz_params[0][i],
                          vae_model.pz_params[1][i]).log_prob(tmp_zs)
        _kl = (tmp_zs - lnj).mean(0)  #k*D
        kld2.append((_kl * nu[i]).mean(0))
    kld2 = torch.stack(kld2).to(device)
    kld2 = torch.sum(kld2)

    obj = supervised_loss - lpx_z + kld + kappa * inc_kld + kld2
    return obj.sum() if not components else (obj.sum(), supervised_loss.sum(),
                                             lpx_z.sum(), kld.sum(),
                                             inc_kld.sum(), kld2.sum())
コード例 #25
0
def expan(embs, l_prel_file, dp_file, lp_file, mode='EMB'):
	# the part to verify iterative expansion
	# Mode = EMB: meaning that the similarity is learned from embedding
	# Mode = DIS: meaning that the similarity is from L-P assignment

	target_type = 'p'
	source_type = 'l'
	multiplier = 5
	thre_softmax = 0.5

	ori_embs = embs
	agg_embs = copy.copy(embs)
	pd_map = load_dp(dp_file, reverse=True)
	dp_map = load_edge_map(dp_file)
	lp_map = load_edge_map(lp_file)
	dist_map = {x:1 for x in embs[target_type]}
	vec_size = 0
	for d in ori_embs[target_type]:
		vec_size = len(ori_embs[target_type][d])
		break

	seeds_map = {}	# label : seed set
	all_seeds = set()
	with open(l_prel_file, 'r') as f:
		for line in f:
			segs = line.strip('\r\n').split('\t')
			if segs[1] == '*':
				continue
			seeds_map[segs[1]] = set()
			seeds_map[segs[1]].add(segs[2].lower())
			all_seeds.add(segs[2].lower())	

	print '*********** Direct Embedding'
	evaluate(ori_embs, true_file, target_dim)

	agg_embs[source_type] = weighted_avg_embedding(lp_map, agg_embs[target_type], dist_map, vec_size)
	agg_embs['d'] = weighted_avg_embedding(dp_map, agg_embs[target_type], dist_map, vec_size)

	print '*********** Aggregate without expansion'
	evaluate(agg_embs, true_file, target_dim)

	for i in range(2):
		print '======== iter ' + str(i) + ' of expansion.'
		extended_seeds = expan_round(agg_embs, seeds_map, all_seeds, 3, 1, mode=mode, pd_map=pd_map)
		print '============= seeds expanded'

		for seed in extended_seeds:
			label, phrase = seed.split('@')
			if label not in lp_map or phrase in lp_map[label]:
				print 'ERRRROR!!! ' + seed
			all_seeds.add(phrase.lower())
			seeds_map[label].add(phrase.lower())
			lp_map[label][phrase] = 1

		agg_embs[source_type] = weighted_avg_embedding(lp_map, agg_embs[target_type], dist_map, vec_size)

		print '*********** Aggregate with expansion at iter ' + str(i)
		evaluate(agg_embs, true_file, target_dim)

	normal = False
	source_type = 'd'
	target_type = 'l'
	mid_type = 'p'

	for i in range(2):

		if i > 0:
			normal = True

		print '============= iter ' + str(i) + ' of dist started.'

		pred_label, doc_score = doc_assignment(agg_embs, 'd', 'l')
		top_labels = [w.path for w in hier.get_nodes_at_level(1)]

		print '============= docs assigned to labels'

		# # print meta stats
		# top_label_cnts = {}
		# for label in top_labels:
		# 	top_label_cnts[label] = 0
		# for doc_pair in filtered_docs:
		# 	l = pred_label[doc_pair[0]]
		# 	top_label_cnts[l] += 1
		# print top_label_cnts
		# print 'top level labels: ' + str(top_labels)

		label_to_idx = {}
		for idx, label in enumerate(top_labels):
			label_to_idx[label] = idx
		uniform_vec = [1.0/len(top_labels)] * len(top_labels)
		# print uniform_vec
		label_to_doc = {}
		
		for label in top_labels:
			label_to_doc[label] = set()


		docs_used = {}

		if normal:
			print 'used docs in reweighting: ' + str(len(pred_label))
			for doc, score in doc_score.iteritems():
				label_to_doc[pred_label[doc]].add(doc)
		else:
			for label in top_labels:
				p = label.lower()
				# idx = label_to_idx[label]
				for doc in pd_map[p]:
					label_to_doc[label].add(doc)
					if doc not in docs_used:
						docs_used[doc] = set()
					docs_used[doc].add(label)
			print 'docs used: %d' % len(docs_used)


		cnt_vec = [0.0] * len(top_labels)
		for label in label_to_doc:
			cnt_vec[label_to_idx[label]] = len(label_to_doc[label])
		comp_vec = utils.l1_normalize(cnt_vec)

		print cnt_vec

		# print comp_vec

		distinct_map = {}
		
		if normal:
			for phrase in embs[mid_type]:
				p_vec = [0.0] * len(top_labels)

				# if len(pd_map[phrase]) < 100:
				# 	continue

				for doc in pd_map[phrase]:
					idx = label_to_idx[pred_label[doc]]
					p_vec[idx] += 1.0
				
				if sum(p_vec) == 0:
					print 'ERROR!!!!!!!!!!'
					continue

				p_vec = utils.l1_normalize(p_vec)

				# kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec)
				kl = utils.kl_divergence(p_vec, uniform_vec)
				# kl = utils.kl_divergence(p_vec, comp_vec)
				distinct_map[phrase] = kl
		else:
			for phrase in embs[mid_type]:
				p_vec = [0.0] * len(top_labels)

				# if len(pd_map[phrase]) < 100:
				# 	continue

				for doc in pd_map[phrase]:
					if doc in docs_used:
						for label in docs_used[doc]:
							idx = label_to_idx[label]
							p_vec[idx] += 1.0

				# print p_vec
				
				if sum(p_vec) == 0:
					distinct_map[phrase] = 0
					# print 'ERROR!!!!!!!!!!'
					continue
				
				# p_vec = [x / cnt_vec[i] for i, x in enumerate(p_vec)]


				p_vec = utils.l1_normalize(p_vec)

				# kl = 0.1 + 0.9 * utils.kl_divergence(p_vec, uniform_vec)
				# kl = utils.kl_divergence(p_vec, uniform_vec)
				kl = utils.kl_divergence(p_vec, comp_vec)
				distinct_map[phrase] = kl

		dist_map = distinct_map
		with open('focal_comp.txt', 'w+') as g:
			for (ph, score) in sorted(dist_map.items(), key=operator.itemgetter(1), reverse=True):
				g.write('%s,%f\t' % (ph, score))

		print '============= phrase distinctness computed.'

		agg_embs[source_type] = weighted_avg_embedding(dp_map, agg_embs[mid_type], dist_map, vec_size)
		print '============= doc embedding aggregated.'

		print '*********** Aggregate with distinct at iter ' + str(i)
		evaluate(agg_embs, true_file, target_dim)


	return
コード例 #26
0
ファイル: vae_objectives.py プロジェクト: v1xerunt/LEMONADE
def objective(vae_model,
              c_model,
              device,
              x,
              mask,
              types,
              label,
              beta,
              nu,
              K=10,
              kappa=1.0,
              components=False):
    """Computes E_{p(x)}[ELBO_{\alpha,\beta}] """
    types = vae_model.disease_types
    qz_x, px_z, zs = vae_model(x, K)

    # compute supervised loss
    pred = c_model(zs.squeeze(0), device)
    bce = torch.nn.BCELoss(reduction='none')
    onehot_label = torch.zeros((x.size(0), types), dtype=torch.float32)
    for i in range(len(x)):
        for j in range(len(mask[i])):
            onehot_label[i][mask[i][j]] = 1
    supervised_loss = bce(pred, onehot_label)

    # compute vae loss
    lpx_z = px_z.log_prob(x).view(*px_z.batch_shape[:2], -1).sum(-1)
    pz = vae_model.pz(*vae_model.pz_params)
    kld = kl_divergence(qz_x, pz, samples=zs).sum(-1)

    # compute kl(p(z), q(z))
    B, D = qz_x.loc.shape
    _zs = pz.rsample(torch.Size([B]))
    lpz = pz.log_prob(_zs).sum(-1).squeeze(-1)

    _zs_expand = _zs.expand(B, B, D)
    lqz = qz_x.log_prob(_zs_expand).sum(-1)  #B*B

    #    qz = []
    #    _max = torch.max(lqz)
    #    for i in range(types):
    #        tmp_mask = torch.sum(mask == i,dim=-1).bool()
    #        ds = lqz[:, tmp_mask] - _max
    #        qz_j = torch.exp(ds)  #B*k
    #        qz.append((qz_j * beta[i]))
    #    qz = torch.cat(qz, dim=1).to(device)
    #    lqz = torch.sum(qz, dim=1)
    #    lqz = _max + torch.log(lqz) - math.log(qz.size(1))
    lqz = log_mean_exp(lqz, dim=1)

    inc_kld = lpz - lqz
    inc_kld = inc_kld.mean(0, keepdim=True).expand(1, B)
    inc_kld = inc_kld.mean(0).sum() / B

    # compute kl(q(z), N(z))
    _zs = qz_x.rsample(torch.Size([B]))  #B*B*D
    lqz = qz_x.log_prob(_zs)  #B*B*D
    kld2 = []
    for i in range(types):
        tmp_mask = torch.sum(mask == i, dim=-1).bool()
        if torch.sum(tmp_mask) == 0:
            continue
        tmp_zs = _zs[:, tmp_mask, :]  #B*k*D
        lnj = dist.Normal(vae_model.pz_params[0][i],
                          vae_model.pz_params[1][i]).log_prob(tmp_zs)
        _kl = (tmp_zs - lnj).mean(0)  #k*D
        kld2.append((_kl * nu[i]).mean(0))
    kld2 = torch.stack(kld2).to(device)
    kld2 = torch.sum(kld2)

    obj = supervised_loss.sum(
        dim=-1) - lpx_z + kld.mean() + kappa * inc_kld + kld2
    return obj.mean() if not components else (
        obj.mean(), supervised_loss.mean(dim=0), lpx_z.mean(), kld.mean(),
        inc_kld.mean(), kld2.mean(), pred.cpu().detach().numpy(),
        onehot_label.cpu().detach().numpy())