Exemple #1
0
    def __init__(self, corpus, config):
        super(LIRL, self).__init__(config)
        self.use_gpu = config.use_gpu
        self.config = config
        self.vocab = corpus.vocab
        self.vocab_dict = corpus.vocab_dict
        self.vocab_size = len(self.vocab)
        self.action_number = config.action_num
        self.bos_id = self.vocab_dict[BOS]
        self.eos_id = self.vocab_dict[EOS]
        self.pad_id = self.vocab_dict[PAD]
        self.k_size = config.k_size
        self.y_size = config.y_size
        self.simple_posterior = config.simple_posterior
        self.contextual_posterior = config.contextual_posterior

        # the hierarchical Context Encoder is pre-trained
        self.ContextEncoder = ContEncoder(corpus, config)
        self.c2z = PolicyNN(config)
        self.z_embedding = nn.Linear(self.action_number,
                                     config.dec_cell_size,
                                     bias=False)
        self.gumbel_connector = nn_lib.GumbelConnector(self.use_gpu)
        # connector
        self.c_init_connector = nn_lib.LinearConnector(
            self.action_number, config.dec_cell_size, config.rnn_cell == 'gru')
        # decoder
        self.embedding = None
        self.decoder = DecoderRNN(self.vocab_size,
                                  config.max_dec_len,
                                  config.embed_size,
                                  config.dec_cell_size,
                                  self.bos_id,
                                  self.eos_id,
                                  n_layers=1,
                                  rnn_cell=config.rnn_cell,
                                  input_dropout_p=config.dropout,
                                  dropout_p=config.dropout,
                                  use_attention=config.use_attn,
                                  attn_size=config.dec_cell_size,
                                  attn_mode=config.attn_type,
                                  use_gpu=config.use_gpu,
                                  embedding=self.embedding)

        self.nll = NLLEntropy(self.pad_id, config)
        self.cat_kl_loss = CatKLLoss()
        self.entropy_loss = Entropy()
        self.log_uniform_y = Variable(torch.log(torch.ones(1) / config.k_size))
        self.eye = Variable(torch.eye(self.config.y_size).unsqueeze(0))
        self.beta = self.config.beta if hasattr(self.config, 'beta') else 0.0
        if self.use_gpu:
            self.log_uniform_y = self.log_uniform_y.cuda()
            self.eye = self.eye.cuda()
Exemple #2
0
    def __init__(self, corpus, config):
        super(StED, self).__init__(config)
        self.vocab = corpus.vocab
        self.rev_vocab = corpus.rev_vocab
        self.vocab_size = len(self.vocab)
        self.go_id = self.rev_vocab[BOS]
        self.eos_id = self.rev_vocab[EOS]
        if not hasattr(config, "freeze_step"):
            config.freeze_step = 6000

        # build model here
        # word embeddings
        self.x_embedding = nn.Embedding(self.vocab_size, config.embed_size)

        # latent action learned
        self.x_encoder = EncoderRNN(config.embed_size, config.dec_cell_size,
                                    dropout_p=config.dropout,
                                    rnn_cell=config.rnn_cell,
                                    variable_lengths=False)

        self.q_y = nn.Linear(config.dec_cell_size, config.y_size * config.k)
        self.x_init_connector = nn_lib.LinearConnector(config.y_size * config.k,
                                                       config.dec_cell_size,
                                                       config.rnn_cell == 'lstm')
        # decoder
        self.prev_decoder = DecoderRNN(self.vocab_size, config.max_dec_len,
                                        config.embed_size, config.dec_cell_size,
                                        self.go_id, self.eos_id,
                                        n_layers=1, rnn_cell=config.rnn_cell,
                                        input_dropout_p=config.dropout,
                                        dropout_p=config.dropout,
                                        use_attention=False,
                                        use_gpu=config.use_gpu,
                                        embedding=self.x_embedding)

        self.next_decoder = DecoderRNN(self.vocab_size, config.max_dec_len,
                                        config.embed_size, config.dec_cell_size,
                                        self.go_id, self.eos_id,
                                        n_layers=1, rnn_cell=config.rnn_cell,
                                        input_dropout_p=config.dropout,
                                        dropout_p=config.dropout,
                                        use_attention=False,
                                        use_gpu=config.use_gpu,
                                        embedding=self.x_embedding)


        # Encoder-Decoder STARTS here
        self.embedding = nn.Embedding(self.vocab_size, config.embed_size,
                                      padding_idx=self.rev_vocab[PAD])

        self.utt_encoder = RnnUttEncoder(config.utt_cell_size, config.dropout,
                                         bidirection=False,
                                         #  bidirection=True in the original code
                                         use_attn=config.utt_type == 'attn_rnn',
                                         vocab_size=self.vocab_size,
                                         embedding=self.embedding)

        self.ctx_encoder = EncoderRNN(self.utt_encoder.output_size,
                                      config.ctx_cell_size,
                                      0.0,
                                      config.dropout,
                                      config.num_layer,
                                      config.rnn_cell,
                                      variable_lengths=config.fix_batch)
        # FNN to get Y
        self.p_fc1 = nn.Linear(config.ctx_cell_size, config.ctx_cell_size)
        self.p_y = nn.Linear(config.ctx_cell_size, config.y_size * config.k)

        # connector
        self.c_init_connector = nn_lib.LinearConnector(config.y_size * config.k,
                                                       config.dec_cell_size,
                                                       config.rnn_cell == 'lstm')
        # decoder
        self.decoder = DecoderRNN(self.vocab_size, config.max_dec_len,
                                  config.embed_size, config.dec_cell_size,
                                  self.go_id, self.eos_id,
                                  n_layers=1, rnn_cell=config.rnn_cell,
                                  input_dropout_p=config.dropout,
                                  dropout_p=config.dropout,
                                  use_attention=config.use_attn,
                                  attn_size=config.dec_cell_size,
                                  attn_mode=config.attn_type,
                                  use_gpu=config.use_gpu,
                                  embedding=self.embedding)

        # force G(z,c) has z
        if config.use_attribute:
            self.attribute_loss = criterions.NLLEntropy(-100, config)

        self.cat_connector = nn_lib.GumbelConnector(config.use_gpu)
        self.greedy_cat_connector = nn_lib.GreedyConnector()
        self.nll_loss = criterions.NLLEntropy(self.rev_vocab[PAD], self.config)
        self.cat_kl_loss = criterions.CatKLLoss()
        self.log_uniform_y = Variable(torch.log(torch.ones(1) / config.k))
        self.entropy_loss = criterions.Entropy()

        if self.use_gpu:
            self.log_uniform_y = self.log_uniform_y.cuda()
        self.kl_w = 0.0
    def __init__(self, corpus, config):
        super(DiVST, self).__init__(config)
        self.vocab = corpus.vocab
        self.rev_vocab = corpus.rev_vocab
        self.vocab_size = len(self.vocab)
        self.embed_size = config.embed_size
        self.max_utt_len = config.max_utt_len
        self.go_id = self.rev_vocab[BOS]
        self.eos_id = self.rev_vocab[EOS]
        self.num_layer = config.num_layer
        self.dropout = config.dropout
        self.enc_cell_size = config.enc_cell_size
        self.dec_cell_size = config.dec_cell_size
        self.rnn_cell = config.rnn_cell
        self.max_dec_len = config.max_dec_len
        self.use_attn = config.use_attn
        self.beam_size = config.beam_size
        self.utt_type = config.utt_type
        self.bi_enc_cell = config.bi_enc_cell
        self.attn_type = config.attn_type
        self.enc_out_size = self.enc_cell_size*2 if self.bi_enc_cell else self.enc_cell_size


        # build model here
        self.embedding = nn.Embedding(self.vocab_size, self.embed_size,
                                      padding_idx=self.rev_vocab[PAD])

        self.x_encoder = EncoderRNN(self.embed_size, self.enc_cell_size,
                                    bidirection=self.bi_enc_cell,
                                    dropout_p=self.dropout,
                                    rnn_cell=self.rnn_cell,
                                    variable_lengths=False)

        self.q_y = nn.Linear(self.enc_out_size, config.y_size * config.k)
        self.cat_connector = nn_lib.GumbelConnector()
        self.dec_init_connector = nn_lib.LinearConnector(config.y_size * config.k,
                                                         self.dec_cell_size,
                                                         self.rnn_cell == 'lstm')


        self.prev_decoder = DecoderRNN(self.vocab_size, self.max_dec_len,
                                       self.embed_size, self.dec_cell_size,
                                       self.go_id, self.eos_id,
                                       n_layers=1, rnn_cell=self.rnn_cell,
                                       input_dropout_p=self.dropout,
                                       dropout_p=self.dropout,
                                       use_attention=self.use_attn,
                                       attn_size=self.enc_cell_size,
                                       attn_mode=self.attn_type,
                                       use_gpu=self.use_gpu,
                                       embedding=self.embedding)

        self.next_decoder = DecoderRNN(self.vocab_size, self.max_dec_len,
                                       self.embed_size, self.dec_cell_size,
                                       self.go_id, self.eos_id,
                                       n_layers=1, rnn_cell=self.rnn_cell,
                                       input_dropout_p=self.dropout,
                                       dropout_p=self.dropout,
                                       use_attention=self.use_attn,
                                       attn_size=self.enc_cell_size,
                                       attn_mode=self.attn_type,
                                       use_gpu=self.use_gpu,
                                       embedding=self.embedding)

        self.nll_loss = criterions.NLLEntropy(self.rev_vocab[PAD], self.config)
        self.cat_kl_loss = criterions.CatKLLoss()
        self.cross_ent_loss = criterions.CrossEntropyoss()
        self.entropy_loss = criterions.Entropy()
        self.log_uniform_y = Variable(torch.log(torch.ones(1) / config.k))
        if self.use_gpu:
            self.log_uniform_y = self.log_uniform_y.cuda()
        self.kl_w = 1.0
    def __init__(self, corpus, config):
        super(DirVAE, self).__init__(config)
        self.vocab = corpus.vocab
        self.rev_vocab = corpus.rev_vocab
        self.vocab_size = len(self.vocab)
        self.embed_size = config.embed_size
        self.max_utt_len = config.max_utt_len
        self.go_id = self.rev_vocab[BOS]
        self.eos_id = self.rev_vocab[EOS]
        self.num_layer = config.num_layer
        self.dropout = config.dropout
        self.enc_cell_size = config.enc_cell_size
        self.dec_cell_size = config.dec_cell_size
        self.rnn_cell = config.rnn_cell
        self.max_dec_len = config.max_dec_len
        self.use_attn = config.use_attn
        self.beam_size = config.beam_size
        self.utt_type = config.utt_type
        self.bi_enc_cell = config.bi_enc_cell
        self.attn_type = config.attn_type
        self.enc_out_size = self.enc_cell_size * 2 if self.bi_enc_cell else self.enc_cell_size
        self.full_kl_step = 4200.0

        # build model here
        self.embedding = nn.Embedding(self.vocab_size,
                                      self.embed_size,
                                      padding_idx=self.rev_vocab[PAD])

        self.x_encoder = EncoderRNN(self.embed_size,
                                    self.enc_cell_size,
                                    dropout_p=self.dropout,
                                    rnn_cell=self.rnn_cell,
                                    variable_lengths=self.config.fix_batch,
                                    bidirection=self.bi_enc_cell)

        # Dirichlet Topic Model prior
        self.h_dim = config.latent_size
        self.a = 1. * np.ones((1, self.h_dim)).astype(np.float32)
        prior_mean = torch.from_numpy(
            (np.log(self.a).T - np.mean(np.log(self.a), 1)).T)
        prior_var = torch.from_numpy(
            (((1.0 / self.a) * (1 - (2.0 / self.h_dim))).T +
             (1.0 / (self.h_dim * self.h_dim)) * np.sum(1.0 / self.a, 1)).T)
        prior_logvar = prior_var.log()

        self.register_buffer('prior_mean', prior_mean)
        self.register_buffer('prior_var', prior_var)
        self.register_buffer('prior_logvar', prior_logvar)

        self.logvar_fc = nn.Sequential(
            nn.Linear(self.enc_cell_size,
                      np.maximum(config.latent_size * 2, 100)), nn.Tanh(),
            nn.Linear(np.maximum(config.latent_size * 2, 100),
                      config.latent_size))
        self.mean_fc = nn.Sequential(
            nn.Linear(self.enc_cell_size,
                      np.maximum(config.latent_size * 2, 100)), nn.Tanh(),
            nn.Linear(np.maximum(config.latent_size * 2, 100),
                      config.latent_size))
        self.mean_bn = nn.BatchNorm1d(self.h_dim)  # bn for mean
        self.logvar_bn = nn.BatchNorm1d(self.h_dim)  # bn for logvar
        self.decoder_bn = nn.BatchNorm1d(self.vocab_size)

        self.logvar_bn.weight.requires_grad = False
        self.mean_bn.weight.requires_grad = False
        self.decoder_bn.weight.requires_grad = False

        self.logvar_bn.weight.fill_(1)
        self.mean_bn.weight.fill_(1)
        self.decoder_bn.weight.fill_(1)
        # self.q_y = nn.Linear(self.enc_out_size, self.h_dim)
        #self.cat_connector = nn_lib.GumbelConnector()

        # Prior for the Generation
        # self.z_mean  = nn.Sequential(
        #                 nn.Linear(self.enc_cell_size, np.maximum(config.latent_size * 2, 100)),
        #                 nn.Tanh(),
        #                 nn.Linear(np.maximum(config.latent_size * 2, 100), config.latent_size)
        #                 )

        # self.z_logvar = self.z_mean  = nn.Sequential(
        #                 nn.Linear(self.enc_cell_size, np.maximum(config.latent_size * 2, 100)),
        #                 nn.Tanh(),
        #                 nn.Linear(np.maximum(config.latent_size * 2, 100), config.latent_size)
        #                 )

        self.dec_init_connector = nn_lib.LinearConnector(
            config.latent_size,  #+ self.h_dim,
            self.dec_cell_size,
            self.rnn_cell == 'lstm',
            has_bias=False)

        self.decoder = DecoderRNN(self.vocab_size,
                                  self.max_dec_len,
                                  self.embed_size,
                                  self.dec_cell_size,
                                  self.go_id,
                                  self.eos_id,
                                  n_layers=1,
                                  rnn_cell=self.rnn_cell,
                                  input_dropout_p=self.dropout,
                                  dropout_p=self.dropout,
                                  use_attention=self.use_attn,
                                  attn_size=self.enc_cell_size,
                                  attn_mode=self.attn_type,
                                  use_gpu=self.use_gpu,
                                  embedding=self.embedding)

        self.nll_loss = criterions.NLLEntropy(self.rev_vocab[PAD], self.config)
        #self.cat_kl_loss = criterions.CatKLLoss()
        #self.cross_ent_loss = criterions.CrossEntropyoss()
        self.entropy_loss = criterions.Entropy()
        # self.log_py = nn.Parameter(torch.log(torch.ones(self.config.y_size,
        #                                                 self.config.k)/config.k),
        #                            requires_grad=True)
        # self.register_parameter('log_py', self.log_py)

        # self.log_uniform_y = Variable(torch.log(torch.ones(1) / config.k))
        # if self.use_gpu:
        #     self.log_uniform_y = self.log_uniform_y.cuda()

        # BOW loss
        self.bow_project = nn.Sequential(
            #nn.Linear(self.h_dim + config.latent_size, self.vocab_size),
            nn.Linear(config.latent_size, self.vocab_size),
            self.decoder_bn)

        self.kl_w = 0.0