def __init__(self, hid_dim, latent_dim, dec_layers, dropout, dec_max_len, beam_size, WEAtt_type, decoder_emb, pad_id): super(LM, self).__init__() self.voc_size = decoder_emb.num_embeddings self.emb_dim = decoder_emb.embedding_dim self.hid_dim = hid_dim self.dec_layers = dec_layers self.dropout = dropout self.n_dir = 2 # Kept for dimension consistency. self.dec_max_len = dec_max_len self.beam_size = beam_size self.WEAtt_type = WEAtt_type self.latent_dim = latent_dim # Kept for nothing. self.Decoder = Decoder(voc_size=self.voc_size, latent_dim=self.latent_dim, emb_dim=self.emb_dim, hid_dim=self.hid_dim * self.n_dir, n_layer=self.dec_layers, dropout=self.dropout, max_len=self.dec_max_len, beam_size=self.beam_size, WEAtt_type=self.WEAtt_type, embedding=decoder_emb) self.criterionSeq = SeqLoss(voc_size=self.voc_size, pad=pad_id, end=None, unk=None)
def __init__(self, hid_dim, latent_dim, enc_layers, dec_layers, dropout, enc_bi, dec_max_len, beam_size, WEAtt_type, encoder_emb, decoder_emb, pad_id): super(S_VAE_COUPLE, self).__init__() assert encoder_emb.num_embeddings == decoder_emb.num_embeddings assert encoder_emb.embedding_dim == decoder_emb.embedding_dim self.voc_size = encoder_emb.num_embeddings self.emb_dim = encoder_emb.embedding_dim self.hid_dim = hid_dim self.enc_layers = enc_layers self.dec_layers = dec_layers self.dropout = dropout self.enc_bi = enc_bi self.n_dir = 2 if self.enc_bi else 1 self.dec_max_len = dec_max_len self.beam_size = beam_size self.WEAtt_type = WEAtt_type self.latent_dim = latent_dim self.Encoder = Encoder(emb_dim=self.emb_dim, hid_dim=self.hid_dim, n_layer=self.enc_layers, dropout=self.dropout, bi=self.enc_bi, embedding=encoder_emb) self.PriorUniform = HypersphericalUniform(dim=self.latent_dim) self.PosteriorVMF = VonMisesFisherModule(in_dim=self.hid_dim * self.n_dir * self.enc_layers, out_dim=self.latent_dim) self.PosteriorVMFCouple = VonMisesFisherModule(in_dim=self.hid_dim * self.n_dir * self.enc_layers, out_dim=self.latent_dim, no_instance=True) self.Decoder = Decoder(voc_size=self.voc_size, latent_dim=self.latent_dim, emb_dim=self.emb_dim, hid_dim=self.hid_dim * self.n_dir, n_layer=self.dec_layers, dropout=self.dropout, max_len=self.dec_max_len, beam_size=self.beam_size, WEAtt_type=self.WEAtt_type, embedding=decoder_emb) self.DecoderCouple = Decoder(voc_size=self.voc_size, latent_dim=self.latent_dim, emb_dim=self.emb_dim, hid_dim=self.hid_dim * self.n_dir, n_layer=self.dec_layers, dropout=self.dropout, max_len=self.dec_max_len, beam_size=self.beam_size, WEAtt_type=self.WEAtt_type, embedding=decoder_emb) self.criterionSeq = SeqLoss(voc_size=self.voc_size, pad=pad_id, end=None, unk=None) self.toInit = nn.Sequential(nn.Linear(self.latent_dim, self.emb_dim), nn.ReLU(), nn.Linear(self.emb_dim, self.emb_dim)) self.toInitCouple = nn.Sequential(nn.Linear(self.latent_dim, self.emb_dim), nn.ReLU(), nn.Linear(self.emb_dim, self.emb_dim))
def __init__(self, hid_dim, latent_dim, enc_layers, dec_layers, dropout, enc_bi, dec_max_len, beam_size, WEAtt_type, encoder_emb, decoder_emb, pad_id): super(VAE, self).__init__() assert encoder_emb.num_embeddings == decoder_emb.num_embeddings assert encoder_emb.embedding_dim == decoder_emb.embedding_dim self.voc_size = encoder_emb.num_embeddings self.emb_dim = encoder_emb.embedding_dim self.hid_dim = hid_dim self.enc_layers = enc_layers self.dec_layers = dec_layers self.dropout = dropout self.enc_bi = enc_bi self.n_dir = 2 if self.enc_bi else 1 self.dec_max_len = dec_max_len self.beam_size = beam_size self.WEAtt_type = WEAtt_type self.latent_dim = latent_dim self.PostEncoder = Encoder(emb_dim=self.emb_dim, hid_dim=self.hid_dim, n_layer=self.enc_layers, dropout=self.dropout, bi=self.enc_bi, embedding=encoder_emb) self.RespEncoder = Encoder(emb_dim=self.emb_dim, hid_dim=self.hid_dim, n_layer=self.enc_layers, dropout=self.dropout, bi=self.enc_bi, embedding=encoder_emb) self.PriorGaussian = Gaussian(in_dim=self.hid_dim * self.n_dir * self.enc_layers, out_dim=self.latent_dim) self.PosteriorGaussian = Gaussian(in_dim=2 * self.hid_dim * self.n_dir * self.enc_layers, out_dim=self.latent_dim) self.PostRepr = nn.Linear(self.hid_dim * self.n_dir * self.enc_layers, self.emb_dim) self.Decoder = Decoder(voc_size=self.voc_size, latent_dim=self.latent_dim, emb_dim=self.emb_dim, hid_dim=self.hid_dim * self.n_dir, n_layer=self.dec_layers, dropout=self.dropout, max_len=self.dec_max_len, beam_size=self.beam_size, WEAtt_type=self.WEAtt_type, embedding=decoder_emb) self.BoW = nn.Linear(self.latent_dim, self.voc_size) self.criterionSeq = SeqLoss(voc_size=self.voc_size, pad=pad_id, end=None, unk=None)
def __init__(self, hid_dim, latent_dim, enc_layers, dec_layers, dropout, enc_bi, dec_max_len, beam_size, WEAtt_type, encoder_emb, decoder_emb, pad_id, n_flows, flow_type): super(VAE_NF, self).__init__() assert encoder_emb.num_embeddings == decoder_emb.num_embeddings assert encoder_emb.embedding_dim == decoder_emb.embedding_dim self.voc_size = encoder_emb.num_embeddings self.emb_dim = encoder_emb.embedding_dim self.hid_dim = hid_dim self.enc_layers = enc_layers self.dec_layers = dec_layers self.dropout = dropout self.enc_bi = enc_bi self.n_dir = 2 if self.enc_bi else 1 self.dec_max_len = dec_max_len self.beam_size = beam_size self.WEAtt_type = WEAtt_type self.latent_dim = latent_dim self.n_flows = n_flows self.flow_type = flow_type self.Encoder = Encoder(emb_dim=self.emb_dim, hid_dim=self.hid_dim, n_layer=self.enc_layers, dropout=self.dropout, bi=self.enc_bi, embedding=encoder_emb) self.PriorGaussian = torch.distributions.Normal( gpu_wrapper(torch.zeros(self.latent_dim)), gpu_wrapper(torch.ones(self.latent_dim))) self.PosteriorGaussian = Gaussian(in_dim=self.hid_dim * self.n_dir * self.enc_layers, out_dim=self.latent_dim) self.Decoder = Decoder(voc_size=self.voc_size, latent_dim=self.latent_dim, emb_dim=self.emb_dim, hid_dim=self.hid_dim * self.n_dir, n_layer=self.dec_layers, dropout=self.dropout, max_len=self.dec_max_len, beam_size=self.beam_size, WEAtt_type=self.WEAtt_type, embedding=decoder_emb) self.BoW = nn.Linear(self.latent_dim, self.voc_size) self.Flows = NormalizingFlows(cond_dim=self.hid_dim * self.n_dir * self.enc_layers, latent_dim=self.latent_dim, n_flows=self.n_flows, flow_type=self.flow_type) self.criterionSeq = SeqLoss(voc_size=self.voc_size, pad=pad_id, end=None, unk=None)
def train(self): # Logging. if config.use_tensorboard: self.build_tensorboard() # Set trainable parameters, according to the frozen parameter list. for scope in self.scopes.keys(): trainable = [] for module in self.scopes[scope]: if getattr(self, module) is not None: for k, v in getattr( self, module).state_dict(keep_vars=True).items(): # k is the parameter name; v is the parameter value. if v.requires_grad: trainable.append(v) print("[{} Trainable:]".format(module), k) else: print("[{} Frozen:]".format(module), k) setattr( self, scope + '_optim', Adam(params=trainable, lr=getattr(self, scope + '_lr'), betas=[config.beta1, config.beta2], weight_decay=config.weight_decay)) setattr(self, 'trainable_' + scope, trainable) # Build criterion. self.criterionSeq = SeqLoss(voc_size=self.train_set.vocab.size, pad=self.train_set.pad, end=self.train_set.eos, unk=self.train_set.unk) # self.criterionReward = RewardCriterion() # self.criterionGAN = GANLoss(config.gan_type) # self.criterionCls = F.binary_cross_entropy_with_logits # self.criterionBernKL = F.binary_cross_entropy # Train. epoch = 0 # self.evaluate_dialogue('test') # self.evaluate_diversity('test', beam_size=1, save=True) try: while True: self.train_epoch(epoch_idx=epoch) epoch += 1 if self.iter_num >= config.num_iters: break except KeyboardInterrupt: print('-' * 100) print('Quit training.')
def train(self): # Logging. if config.use_tensorboard: self.build_tensorboard() # Load pretrained. if config.train_mode == 'cls-only': pass elif config.train_mode == 'pto': self.restore_pretrained(['Classifier', 'Emb', 'Aux_Classifier', 'Aux_Emb']) elif config.train_mode == 'aux-cls-only': pass else: raise ValueError() # Set trainable parameters, according to the frozen parameter list. for scope in self.scopes.keys(): trainable = [] for module in self.scopes[scope]: for k, v in getattr(self, module).state_dict(keep_vars=True).items(): # k is the parameter name; v is the parameter value. if v.requires_grad: trainable.append(v) print("[{} Trainable:]".format(module), k) else: print("[{} Frozen:]".format(module), k) setattr(self, scope + '_optim', Adam(trainable, getattr(self, scope + '_lr'), [config.beta1, config.beta2])) setattr(self, 'trainable_' + scope, trainable) # Build criterion. self.criterionSeq = SeqLoss(voc_size=self.train_set.vocab.size, pad=self.train_set.pad, end=self.train_set.eos, unk=self.train_set.unk) self.criterionCls = nn.BCELoss() self.criterionBack = BackLoss(reduce=False) self.criterionRL = RewardCriterion() # Train. epoch = 0 while True: self.train_epoch(epoch_idx=epoch) epoch += 1 if self.iter_num >= config.num_iters: break self.test()
def __init__(self, hid_dim, latent_dim, enc_layers, dec_layers, dropout, enc_bi, dec_max_len, beam_size, WEAtt_type, encoder_emb, decoder_emb, pad_id): super(WAE_COUPLE, self).__init__() assert encoder_emb.num_embeddings == decoder_emb.num_embeddings assert encoder_emb.embedding_dim == decoder_emb.embedding_dim self.voc_size = encoder_emb.num_embeddings self.emb_dim = encoder_emb.embedding_dim self.hid_dim = hid_dim self.enc_layers = enc_layers self.dec_layers = dec_layers self.dropout = dropout self.enc_bi = enc_bi self.n_dir = 2 if self.enc_bi else 1 self.dec_max_len = dec_max_len self.beam_size = beam_size self.WEAtt_type = WEAtt_type self.latent_dim = latent_dim self.Encoder = Encoder(emb_dim=self.emb_dim, hid_dim=self.hid_dim, n_layer=self.enc_layers, dropout=self.dropout, bi=self.enc_bi, embedding=encoder_emb) self.PriorGaussian = torch.distributions.Normal( gpu_wrapper(torch.zeros(self.latent_dim)), gpu_wrapper(torch.ones(self.latent_dim))) self.PosteriorGaussian = Gaussian(in_dim=self.hid_dim * self.n_dir * self.enc_layers, out_dim=self.latent_dim) self.PosteriorGaussianCouple = Gaussian(in_dim=self.hid_dim * self.n_dir * self.enc_layers, out_dim=self.latent_dim) self.Decoder = Decoder(voc_size=self.voc_size, latent_dim=self.latent_dim, emb_dim=self.emb_dim, hid_dim=self.hid_dim * self.n_dir, n_layer=self.dec_layers, dropout=self.dropout, max_len=self.dec_max_len, beam_size=self.beam_size, WEAtt_type=self.WEAtt_type, embedding=decoder_emb) self.DecoderCouple = Decoder(voc_size=self.voc_size, latent_dim=self.latent_dim, emb_dim=self.emb_dim, hid_dim=self.hid_dim * self.n_dir, n_layer=self.dec_layers, dropout=self.dropout, max_len=self.dec_max_len, beam_size=self.beam_size, WEAtt_type=self.WEAtt_type, embedding=decoder_emb) self.criterionSeq = SeqLoss(voc_size=self.voc_size, pad=pad_id, end=None, unk=None) self.toInit = nn.Sequential(nn.Linear(self.latent_dim, self.emb_dim), nn.ReLU(), nn.Linear(self.emb_dim, self.emb_dim)) self.toInitCouple = nn.Sequential( nn.Linear(self.latent_dim, self.emb_dim), nn.ReLU(), nn.Linear(self.emb_dim, self.emb_dim))