Beispiel #1
0
    def __init__(self, mem_slots, num_heads, head_size, embedding_dim, hidden_dim, vocab_size, max_seq_len, padding_idx,
                 temperature, eta, gpu=False):
        super(RelbarGAN_G, self).__init__(embedding_dim, hidden_dim, vocab_size, max_seq_len, padding_idx, gpu)
        self.name = 'relbargan'
        if gpu:
            self.temperature = torch.tensor(temperature, dtype=torch.float, device='cuda', requires_grad=True)
            self.eta = torch.tensor(eta, dtype=torch.float, device='cuda', requires_grad=True)
        else:
            self.temperature = torch.tensor(temperature, dtype=torch.float, requires_grad=True)
            self.eta = torch.tensor(eta, dtype=torch.float, requires_grad=True)

        # RMC
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
        self.hidden_dim = mem_slots * num_heads * head_size
        self.lstm = RelationalMemory(mem_slots=mem_slots, head_size=head_size, input_size=embedding_dim,
                                     num_heads=num_heads, return_all_outputs=True)
        self.lstm2out = nn.Linear(self.hidden_dim, vocab_size)

        # LSTM
        # self.hidden_dim = 512
        # self.lstm = nn.LSTM(embedding_dim, self.hidden_dim, batch_first=True)
        # self.lstm2out = nn.Linear(self.hidden_dim, vocab_size)

        # θ parameter in the REBAR equation. It is the softmax probability of the Generator output.
        self.theta = None

        self.init_params()
Beispiel #2
0
    def __init__(self,
                 mem_slots,
                 num_heads,
                 head_size,
                 embedding_dim,
                 hidden_dim,
                 vocab_size,
                 max_seq_len,
                 padding_idx,
                 gpu=False):
        super(RelGAN_G, self).__init__(embedding_dim, hidden_dim, vocab_size,
                                       max_seq_len, padding_idx, gpu)
        self.name = 'relgan'

        self.temperature = 1.0  # init value is 1.0

        # RMC
        self.embeddings = nn.Embedding(vocab_size,
                                       embedding_dim,
                                       padding_idx=padding_idx)
        self.hidden_dim = mem_slots * num_heads * head_size
        self.lstm = RelationalMemory(mem_slots=mem_slots,
                                     head_size=head_size,
                                     input_size=embedding_dim,
                                     num_heads=num_heads,
                                     return_all_outputs=True)
        self.lstm2out = nn.Linear(self.hidden_dim, vocab_size)

        # LSTM
        # self.hidden_dim = 512
        # self.lstm = nn.LSTM(embedding_dim, self.hidden_dim, batch_first=True)
        # self.lstm2out = nn.Linear(self.hidden_dim, vocab_size)

        self.init_params()
Beispiel #3
0
    def __init__(self,
                 k_label,
                 mem_slots,
                 num_heads,
                 head_size,
                 embedding_dim,
                 hidden_dim,
                 vocab_size,
                 max_seq_len,
                 padding_idx,
                 gpu=False):
        super(CatGAN_G, self).__init__(embedding_dim, hidden_dim, vocab_size,
                                       max_seq_len, padding_idx, gpu)
        self.name = 'catgan'

        self.k_label = k_label
        self.temperature = nn.Parameter(
            torch.Tensor([1.0]), requires_grad=False)  # init value is 1.0

        # Category matrix
        # self.cat_mat = nn.Parameter(torch.rand(self.k_label, embedding_dim), requires_grad=True)
        self.cat_mat = nn.Parameter(torch.eye(k_label), requires_grad=False)

        self.embeddings = nn.Embedding(vocab_size,
                                       embedding_dim,
                                       padding_idx=padding_idx)
        if cfg.model_type == 'LSTM':
            # LSTM
            self.hidden_dim = hidden_dim
            self.lstm = nn.LSTM(k_label + embedding_dim,
                                self.hidden_dim,
                                batch_first=True)
            self.lstm2out = nn.Linear(self.hidden_dim, vocab_size)
        else:
            # RMC
            self.hidden_dim = mem_slots * num_heads * head_size
            self.lstm = RelationalMemory(mem_slots=mem_slots,
                                         head_size=head_size,
                                         input_size=k_label + embedding_dim,
                                         num_heads=num_heads,
                                         return_all_outputs=True)
            self.lstm2out = nn.Linear(self.hidden_dim, vocab_size)
        self.init_params()
class RelGAN_G(LSTMGenerator):
    def __init__(self,
                 mem_slots,
                 num_heads,
                 head_size,
                 embedding_dim,
                 hidden_dim,
                 vocab_size,
                 max_seq_len,
                 padding_idx,
                 gpu=False):
        super(RelGAN_G, self).__init__(embedding_dim, hidden_dim, vocab_size,
                                       max_seq_len, padding_idx, gpu)
        self.name = 'relgan'

        self.temperature = 1.0  # init value is 1.0

        self.embeddings = nn.Embedding(vocab_size,
                                       embedding_dim,
                                       padding_idx=padding_idx)
        if cfg.model_type == 'LSTM':
            # LSTM
            self.hidden_dim = hidden_dim
            self.lstm = nn.LSTM(embedding_dim,
                                self.hidden_dim,
                                batch_first=True)
            self.lstm2out = nn.Linear(self.hidden_dim, vocab_size)
        else:
            # RMC
            self.hidden_dim = mem_slots * num_heads * head_size
            self.lstm = RelationalMemory(mem_slots=mem_slots,
                                         head_size=head_size,
                                         input_size=embedding_dim,
                                         num_heads=num_heads,
                                         return_all_outputs=True)
            self.lstm2out = nn.Linear(self.hidden_dim, vocab_size)

        self.init_params()
        pass

    def init_hidden(self, batch_size=cfg.batch_size):
        if cfg.model_type == 'LSTM':
            h = torch.zeros(1, batch_size, self.hidden_dim)
            c = torch.zeros(1, batch_size, self.hidden_dim)

            if self.gpu:
                return h.cuda(), c.cuda()
            else:
                return h, c
        else:
            """init RMC memory"""
            memory = self.lstm.initial_state(batch_size)
            memory = self.lstm.repackage_hidden(
                memory)  # detch memory at first
            return memory.cuda() if self.gpu else memory

    def step(self, inp, hidden):
        """
        RelGAN step forward
        :param inp: [batch_size]
        :param hidden: memory size
        :return: pred, hidden, next_token, next_token_onehot, next_o
            - pred: batch_size * vocab_size, use for adversarial training backward
            - hidden: next hidden
            - next_token: [batch_size], next sentence token
            - next_token_onehot: batch_size * vocab_size, not used yet
            - next_o: batch_size * vocab_size, not used yet
        """
        emb = self.embeddings(inp).unsqueeze(1)
        out, hidden = self.lstm(emb, hidden)
        gumbel_t = self.add_gumbel(self.lstm2out(out.squeeze(1)))
        next_token = torch.argmax(gumbel_t, dim=1).detach()
        # next_token_onehot = F.one_hot(next_token, cfg.vocab_size).float()  # not used yet
        next_token_onehot = None

        pred = F.softmax(gumbel_t * self.temperature,
                         dim=-1)  # batch_size * vocab_size
        # next_o = torch.sum(next_token_onehot * pred, dim=1)  # not used yet
        next_o = None

        return pred, hidden, next_token, next_token_onehot, next_o

    def sample(self,
               num_samples,
               batch_size,
               one_hot=False,
               start_letter=cfg.start_letter):
        """
        Sample from RelGAN Generator
        - one_hot: if return pred of RelGAN, used for adversarial training
        :return:
            - all_preds: batch_size * seq_len * vocab_size, only use for a batch
            - samples: all samples
        """
        global all_preds
        num_batch = num_samples // batch_size + 1 if num_samples != batch_size else 1
        samples = torch.zeros(num_batch * batch_size, self.max_seq_len).long()
        if one_hot:
            all_preds = torch.zeros(batch_size, self.max_seq_len,
                                    self.vocab_size)
            if self.gpu:
                all_preds = all_preds.cuda()

        for b in range(num_batch):
            hidden = self.init_hidden(batch_size)
            inp = torch.LongTensor([start_letter] * batch_size)
            if self.gpu:
                inp = inp.cuda()

            for i in range(self.max_seq_len):
                pred, hidden, next_token, _, _ = self.step(inp, hidden)
                samples[b * batch_size:(b + 1) * batch_size, i] = next_token
                if one_hot:
                    all_preds[:, i] = pred
                inp = next_token
        samples = samples[:num_samples]  # num_samples * seq_len

        if one_hot:
            return all_preds  # batch_size * seq_len * vocab_size
        return samples

    @staticmethod
    def add_gumbel(o_t, eps=1e-10, gpu=cfg.CUDA):
        """Add o_t by a vector sampled from Gumbel(0,1)"""
        u = torch.zeros(o_t.size())
        if gpu:
            u = u.cuda()

        u.uniform_(0, 1)
        g_t = -torch.log(-torch.log(u + eps) + eps)
        gumbel_t = o_t + g_t
        return gumbel_t
Beispiel #5
0
class RelbarGAN_G(LSTMGenerator):
    def __init__(self, mem_slots, num_heads, head_size, embedding_dim, hidden_dim, vocab_size, max_seq_len, padding_idx,
                 temperature, eta, gpu=False):
        super(RelbarGAN_G, self).__init__(embedding_dim, hidden_dim, vocab_size, max_seq_len, padding_idx, gpu)
        self.name = 'relbargan'
        if gpu:
            self.temperature = torch.tensor(temperature, dtype=torch.float, device='cuda', requires_grad=True)
            self.eta = torch.tensor(eta, dtype=torch.float, device='cuda', requires_grad=True)
        else:
            self.temperature = torch.tensor(temperature, dtype=torch.float, requires_grad=True)
            self.eta = torch.tensor(eta, dtype=torch.float, requires_grad=True)

        # RMC
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=padding_idx)
        self.hidden_dim = mem_slots * num_heads * head_size
        self.lstm = RelationalMemory(mem_slots=mem_slots, head_size=head_size, input_size=embedding_dim,
                                     num_heads=num_heads, return_all_outputs=True)
        self.lstm2out = nn.Linear(self.hidden_dim, vocab_size)

        # LSTM
        # self.hidden_dim = 512
        # self.lstm = nn.LSTM(embedding_dim, self.hidden_dim, batch_first=True)
        # self.lstm2out = nn.Linear(self.hidden_dim, vocab_size)

        # θ parameter in the REBAR equation. It is the softmax probability of the Generator output.
        self.theta = None

        self.init_params()

    def set_variance_loss_gradients(self, temperature_grad, eta_gradient):
        """
        Sets the variance loss gradients to control variate parameters.

        :param temperature_gradient: gradient from variance loss w.r.t. temperature (has to be detached). Shape: scalar
        :param eta_gradient: gradient from variance loss w.r.t. eta (has to be detached). Shape: scalar
        """
        assert self.temperature.shape == temperature_grad.shape, 'temperature_grad has different shape with self.temperature'
        assert self.eta.shape == eta_gradient.shape, 'eta_gradient has different shape with self.eta'
        self.temperature.grad = temperature_grad
        self.eta.grad = eta_gradient


    def sample_theta(self, batch_size, start_letter=cfg.start_letter):
        """
        Samples the network based on Gumbel logit.

        :param start_letter: index of start_token
        :return θ: batch_size * max_seq_length * vocab_size
                z: batch_size * max_seq_length * vocab_size
        """
        self.theta = torch.zeros(batch_size, self.max_seq_len, self.vocab_size, dtype=torch.float)
        gumbel = torch.zeros(batch_size, self.max_seq_len, self.vocab_size, dtype=torch.float)

        hidden = self.init_hidden(batch_size)
        inp = torch.LongTensor([start_letter] * batch_size)
        if self.gpu:
            inp = inp.cuda()
            self.theta = self.theta.cuda()
            gumbel = gumbel.cuda()

        for i in range(self.max_seq_len):
            emb = self.embeddings(inp).unsqueeze(1)  # batch_size * 1 * embedding_dim
            out, hidden = self.lstm(emb, hidden)
            out = self.lstm2out(out.squeeze(1))  # batch_size * vocab_size
            out = F.softmax(out, dim=-1)  # batch_size * vocab_size
            gumbel_t, gumbel_slice = self.add_gumbel(out)  # batch_size * vocab_size
            next_token = torch.argmax(gumbel_t, dim=1).detach()  # batch_size * vocab_size

            self.theta[:, i, :] = out
            gumbel[:, i, :] = gumbel_slice
            inp = next_token.view(-1)

        eps = 1e-10
        z = torch.log(self.theta + eps) + gumbel
        return self.theta, z


    def computeRebarLoss(self, estimated_gradient):
        """
        Computes the loss based on the estimated REBAR gradient

        :param estimated_gradient: estimated gradient for theta with respect to the loss (has to be detached). Shape: seq_len * vocab_size
        :return loss: REBAR loss
        """
        assert self.theta is not None and \
               self.theta.shape == estimated_gradient.shape, 'estimated_gradient has different shape with self.theta'

        rebar_loss_matrix = self.theta * estimated_gradient
        rebar_loss = rebar_loss_matrix.sum()
        return rebar_loss


    def init_hidden(self, batch_size=cfg.batch_size):
        """init RMC memory"""
        memory = self.lstm.initial_state(batch_size)
        memory = self.lstm.repackage_hidden(memory)  # detch memory at first
        return memory.cuda() if self.gpu else memory

    @staticmethod
    def add_gumbel(theta, eps=1e-10, gpu=cfg.CUDA):
        u = torch.zeros(theta.size())
        if gpu:
            u = u.cuda()

        u.uniform_(0, 1)
        # F.softmax(theta_logit, dim=-1) converts theta_logit to categorical distribution.
        gumbel = - torch.log(-torch.log(u + eps) + eps)
        gumbel_t = torch.log(theta + eps) + gumbel
        return gumbel_t, gumbel
Beispiel #6
0
class CatGAN_G(LSTMGenerator):
    def __init__(self,
                 k_label,
                 mem_slots,
                 num_heads,
                 head_size,
                 embedding_dim,
                 hidden_dim,
                 vocab_size,
                 max_seq_len,
                 padding_idx,
                 gpu=False):
        super(CatGAN_G, self).__init__(embedding_dim, hidden_dim, vocab_size,
                                       max_seq_len, padding_idx, gpu)
        self.name = 'catgan'

        self.k_label = k_label
        self.temperature = nn.Parameter(
            torch.Tensor([1.0]), requires_grad=False)  # init value is 1.0

        # Category matrix
        # self.cat_mat = nn.Parameter(torch.rand(self.k_label, embedding_dim), requires_grad=True)
        self.cat_mat = nn.Parameter(torch.eye(k_label), requires_grad=False)

        self.embeddings = nn.Embedding(vocab_size,
                                       embedding_dim,
                                       padding_idx=padding_idx)
        if cfg.model_type == 'LSTM':
            # LSTM
            self.hidden_dim = hidden_dim
            self.lstm = nn.LSTM(k_label + embedding_dim,
                                self.hidden_dim,
                                batch_first=True)
            self.lstm2out = nn.Linear(self.hidden_dim, vocab_size)
        else:
            # RMC
            self.hidden_dim = mem_slots * num_heads * head_size
            self.lstm = RelationalMemory(mem_slots=mem_slots,
                                         head_size=head_size,
                                         input_size=k_label + embedding_dim,
                                         num_heads=num_heads,
                                         return_all_outputs=True)
            self.lstm2out = nn.Linear(self.hidden_dim, vocab_size)
        self.init_params()

    def init_hidden(self, batch_size=cfg.batch_size):
        if cfg.model_type == 'LSTM':
            h = torch.zeros(1, batch_size, self.hidden_dim)
            c = torch.zeros(1, batch_size, self.hidden_dim)

            if self.gpu:
                return h.cuda(), c.cuda()
            else:
                return h, c
        else:
            """init RMC memory"""
            memory = self.lstm.initial_state(batch_size)
            memory = self.lstm.repackage_hidden(
                memory)  # detch memory at first
            return memory.cuda() if self.gpu else memory

    def forward(self, inp, hidden, label=None, need_hidden=False):
        """
        Embeds input and applies LSTM, concatenate category vector into each embedding
        :param inp: batch_size * seq_len
        :param label: batch_size, specific label index
        :param hidden: memory size
        :param need_hidden: if return hidden, use for sampling
        """
        assert type(label) == torch.Tensor, 'missing label'
        emb = self.embeddings(inp)  # batch_size * len * embedding_dim

        # cat category vector
        label_onehot = F.one_hot(label,
                                 self.k_label).float()  # batch_size * k_label
        label_onehot_ex = label_onehot.unsqueeze(1).expand(
            -1, inp.size(1), -1)  # batch_size * len * k_label
        label_vec = torch.bmm(label_onehot_ex,
                              self.cat_mat.expand(
                                  inp.size(0), -1,
                                  -1))  # batch_size * len * embed_dim
        emb = torch.cat((emb, label_vec),
                        dim=-1)  # batch_sie * len * (k_label + embed_dim)

        out, hidden = self.lstm(
            emb, hidden)  # out: batch_size * seq_len * hidden_dim
        out = out.contiguous().view(
            -1, self.hidden_dim)  # out: (batch_size * len) * hidden_dim
        out = self.lstm2out(out)  # batch_size * seq_len * vocab_size
        # out = self.temperature * out  # temperature
        pred = self.softmax(out)

        if need_hidden:
            return pred, hidden
        else:
            return pred

    def step(self, inp, hidden, label=None):
        """
        RelGAN step forward
        :param inp: batch_size
        :param hidden: memory size
        :param label: batch_size, specific label index
        :return: pred, hidden, next_token
            - pred: batch_size * vocab_size, use for adversarial training backward
            - hidden: next hidden
            - next_token: [batch_size], next sentence token
        """
        assert type(label) == torch.Tensor, 'missing label'
        emb = self.embeddings(inp).unsqueeze(1)

        # cat category vector
        label_onehot = F.one_hot(label,
                                 self.k_label).float()  # batch_size * k_label
        label_onehot_ex = label_onehot.unsqueeze(1).expand(
            -1, 1, -1)  # batch_size * 1 * k_label
        label_vec = torch.bmm(label_onehot_ex,
                              self.cat_mat.expand(
                                  inp.size(0), -1,
                                  -1))  # batch_size * 1 * embed_dim
        emb = torch.cat((emb, label_vec),
                        dim=-1)  # batch_sie * len * (k_label + embed_dim)

        out, hidden = self.lstm(emb, hidden)
        gumbel_t = self.add_gumbel(self.lstm2out(out.squeeze(1)))
        next_token = torch.argmax(gumbel_t, dim=1).detach()

        pred = F.softmax(gumbel_t * self.temperature,
                         dim=-1)  # batch_size * vocab_size

        return pred, hidden, next_token

    def sample(self,
               num_samples,
               batch_size,
               one_hot=False,
               label_i=None,
               start_letter=cfg.start_letter):
        """
        Sample from RelGAN Generator
        - one_hot: if return pred of RelGAN, used for adversarial training
        - label_i: label index
        :return:
            - all_preds: batch_size * seq_len * vocab_size, only use for a batch
            - samples: all samples
        """
        global all_preds
        assert type(label_i) == int, 'missing label'
        num_batch = num_samples // batch_size + 1 if num_samples != batch_size else 1
        samples = torch.zeros(num_batch * batch_size, self.max_seq_len).long()
        if one_hot:
            all_preds = torch.zeros(batch_size, self.max_seq_len,
                                    self.vocab_size)
            if self.gpu:
                all_preds = all_preds.cuda()

        for b in range(num_batch):
            hidden = self.init_hidden(batch_size)
            inp = torch.LongTensor([start_letter] * batch_size)
            label_t = torch.LongTensor([label_i] * batch_size)
            if self.gpu:
                inp = inp.cuda()
                label_t = label_t.cuda()

            for i in range(self.max_seq_len):
                pred, hidden, next_token = self.step(inp, hidden, label_t)
                samples[b * batch_size:(b + 1) * batch_size, i] = next_token
                if one_hot:
                    all_preds[:, i] = pred
                inp = next_token
        samples = samples[:num_samples]  # num_samples * seq_len

        if one_hot:
            return all_preds  # batch_size * seq_len * vocab_size
        return samples

    @staticmethod
    def add_gumbel(o_t, eps=1e-10, gpu=cfg.CUDA):
        """Add o_t by a vector sampled from Gumbel(0,1)"""
        u = torch.rand(o_t.size())
        if gpu:
            u = u.cuda()
        g_t = -torch.log(-torch.log(u + eps) + eps)
        gumbel_t = o_t + g_t
        return gumbel_t