def __init__(self, vocab_size, embed_size, time_step, hidden_size, z_dim,
                 dropout_rate, bos_idx, eos_idx, pad_idx, n_comb):
        super(KLPF, self).__init__()
        self.time_step = time_step
        self.hidden_size = hidden_size
        self.dropout_rate = dropout_rate
        self.bos_idx = bos_idx
        self.eos_idx = eos_idx
        self.pad_idx = pad_idx
        self.number_combination = n_comb
        self.z_dim = z_dim

        # encoder
        self.encoder = iafEncoder(vocab_size, embed_size,
                                   hidden_size, z_dim, pad_idx)
        # iaf
        #self.linIAF = linIAF()
        #self.combination_L = combination_L()
        #self.encoder_y = nn.Linear( hidden_size, self.number_combination )
        #self.encoder_L = nn.Linear( hidden_size, (z_dim**2) * self.number_combination )
        self.flow = flows.myIAF(z_dim, z_dim, hidden_size,1)
        self.softmax = nn.Softmax()
        self.classify = nn.Linear(z_dim, 2)
        # decoder
        self.embedding = nn.Embedding(vocab_size, embed_size,
                                      padding_idx=pad_idx)
        self.init_h = nn.Linear(z_dim, hidden_size)
        self.init_c = nn.Linear(z_dim, hidden_size)
        self.rnn = nn.LSTM(embed_size, hidden_size, batch_first=True)
        # output
        self.output = nn.Linear(hidden_size, vocab_size)
    def __init__(self, config, vocab_size, PAD_token=0):
        super(DFVAE, self).__init__()
        self.vocab_size = vocab_size
        self.maxlen = config['maxlen']
        self.clip = config['clip']
        self.lambda_gp = config['lambda_gp']
        self.temp = config['temp']

        self.embedder = nn.Embedding(vocab_size,
                                     config['emb_size'],
                                     padding_idx=PAD_token)
        self.utt_encoder = Encoder(self.embedder, config['emb_size'],
                                   config['n_hidden'], True,
                                   config['n_layers'], config['noise_radius'])
        self.context_encoder = ContextEncoder(self.utt_encoder,
                                              config['n_hidden'] * 2 + 2,
                                              config['n_hidden'], 1,
                                              config['noise_radius'])
        self.prior_net = Variation(config['n_hidden'],
                                   config['z_size'])  # p(e|c)
        self.post_net = Variation(config['n_hidden'] * 3,
                                  config['z_size'])  # q(e|c,x)

        #self.prior_highway = nn.Linear(config['n_hidden'], config['n_hidden'])
        #self.post_highway = nn.Linear(config['n_hidden'] * 3, config['n_hidden'])
        self.postflow1 = flow.myIAF(config['z_size'], config['z_size'] * 2,
                                    config['n_hidden'], 3)
        self.postflow2 = flow.myIAF(config['z_size'], config['z_size'] * 2,
                                    config['n_hidden'], 3)
        self.postflow3 = flow.myIAF(config['z_size'], config['z_size'] * 2,
                                    config['n_hidden'], 3)
        self.priorflow1 = flow.IAF(config['z_size'], config['z_size'] * 2,
                                   config['n_hidden'], 3)
        self.priorflow2 = flow.IAF(config['z_size'], config['z_size'] * 2,
                                   config['n_hidden'], 3)
        self.priorflow3 = flow.IAF(config['z_size'], config['z_size'] * 2,
                                   config['n_hidden'], 3)

        self.post_generator = nn_.SequentialFlow(self.postflow1,
                                                 self.postflow2,
                                                 self.postflow3)
        self.prior_generator = nn_.SequentialFlow(self.priorflow1,
                                                  self.priorflow2,
                                                  self.priorflow3)

        self.decoder = Decoder(self.embedder,
                               config['emb_size'],
                               config['n_hidden'] + config['z_size'],
                               vocab_size,
                               n_layers=1)

        self.optimizer_AE = optim.SGD(
            list(self.context_encoder.parameters()) +
            list(self.post_net.parameters()) +
            list(self.post_generator.parameters()) +
            list(self.decoder.parameters()) +
            list(self.prior_net.parameters()) +
            list(self.prior_generator.parameters())
            #+list(self.prior_highway.parameters())
            #+list(self.post_highway.parameters())
            ,
            lr=config['lr_ae'])
        self.optimizer_G = optim.RMSprop(
            list(self.post_net.parameters()) +
            list(self.post_generator.parameters()) +
            list(self.prior_net.parameters()) +
            list(self.prior_generator.parameters())
            #+list(self.prior_highway.parameters())
            #+list(self.post_highway.parameters())
            ,
            lr=config['lr_gan_g'])

        #self.optimizer_D = optim.RMSprop(self.discriminator.parameters(), lr=config['lr_gan_d'])

        self.lr_scheduler_AE = optim.lr_scheduler.StepLR(self.optimizer_AE,
                                                         step_size=10,
                                                         gamma=0.6)

        self.criterion_ce = nn.CrossEntropyLoss()