def __init__(self, vocab_size, embed_size, time_step, hidden_size, z_dim, dropout_rate, bos_idx, eos_idx, pad_idx, n_comb): super(KLPF, self).__init__() self.time_step = time_step self.hidden_size = hidden_size self.dropout_rate = dropout_rate self.bos_idx = bos_idx self.eos_idx = eos_idx self.pad_idx = pad_idx self.number_combination = n_comb self.z_dim = z_dim # encoder self.encoder = iafEncoder(vocab_size, embed_size, hidden_size, z_dim, pad_idx) # iaf #self.linIAF = linIAF() #self.combination_L = combination_L() #self.encoder_y = nn.Linear( hidden_size, self.number_combination ) #self.encoder_L = nn.Linear( hidden_size, (z_dim**2) * self.number_combination ) self.flow = flows.myIAF(z_dim, z_dim, hidden_size,1) self.softmax = nn.Softmax() self.classify = nn.Linear(z_dim, 2) # decoder self.embedding = nn.Embedding(vocab_size, embed_size, padding_idx=pad_idx) self.init_h = nn.Linear(z_dim, hidden_size) self.init_c = nn.Linear(z_dim, hidden_size) self.rnn = nn.LSTM(embed_size, hidden_size, batch_first=True) # output self.output = nn.Linear(hidden_size, vocab_size)
def __init__(self, config, vocab_size, PAD_token=0): super(DFVAE, self).__init__() self.vocab_size = vocab_size self.maxlen = config['maxlen'] self.clip = config['clip'] self.lambda_gp = config['lambda_gp'] self.temp = config['temp'] self.embedder = nn.Embedding(vocab_size, config['emb_size'], padding_idx=PAD_token) self.utt_encoder = Encoder(self.embedder, config['emb_size'], config['n_hidden'], True, config['n_layers'], config['noise_radius']) self.context_encoder = ContextEncoder(self.utt_encoder, config['n_hidden'] * 2 + 2, config['n_hidden'], 1, config['noise_radius']) self.prior_net = Variation(config['n_hidden'], config['z_size']) # p(e|c) self.post_net = Variation(config['n_hidden'] * 3, config['z_size']) # q(e|c,x) #self.prior_highway = nn.Linear(config['n_hidden'], config['n_hidden']) #self.post_highway = nn.Linear(config['n_hidden'] * 3, config['n_hidden']) self.postflow1 = flow.myIAF(config['z_size'], config['z_size'] * 2, config['n_hidden'], 3) self.postflow2 = flow.myIAF(config['z_size'], config['z_size'] * 2, config['n_hidden'], 3) self.postflow3 = flow.myIAF(config['z_size'], config['z_size'] * 2, config['n_hidden'], 3) self.priorflow1 = flow.IAF(config['z_size'], config['z_size'] * 2, config['n_hidden'], 3) self.priorflow2 = flow.IAF(config['z_size'], config['z_size'] * 2, config['n_hidden'], 3) self.priorflow3 = flow.IAF(config['z_size'], config['z_size'] * 2, config['n_hidden'], 3) self.post_generator = nn_.SequentialFlow(self.postflow1, self.postflow2, self.postflow3) self.prior_generator = nn_.SequentialFlow(self.priorflow1, self.priorflow2, self.priorflow3) self.decoder = Decoder(self.embedder, config['emb_size'], config['n_hidden'] + config['z_size'], vocab_size, n_layers=1) self.optimizer_AE = optim.SGD( list(self.context_encoder.parameters()) + list(self.post_net.parameters()) + list(self.post_generator.parameters()) + list(self.decoder.parameters()) + list(self.prior_net.parameters()) + list(self.prior_generator.parameters()) #+list(self.prior_highway.parameters()) #+list(self.post_highway.parameters()) , lr=config['lr_ae']) self.optimizer_G = optim.RMSprop( list(self.post_net.parameters()) + list(self.post_generator.parameters()) + list(self.prior_net.parameters()) + list(self.prior_generator.parameters()) #+list(self.prior_highway.parameters()) #+list(self.post_highway.parameters()) , lr=config['lr_gan_g']) #self.optimizer_D = optim.RMSprop(self.discriminator.parameters(), lr=config['lr_gan_d']) self.lr_scheduler_AE = optim.lr_scheduler.StepLR(self.optimizer_AE, step_size=10, gamma=0.6) self.criterion_ce = nn.CrossEntropyLoss()