Ejemplo n.º 1
0
    def train_one_epoch(self):
        FloatTensor, LongTensor, ByteTensor = utils.get_pytorch_tensors()
        losses = []
        s_w2i = self.s_w2i
        t_w2i = self.t_w2i
        for i, batch in enumerate(
                utils.get_batch(self.args.batch_size, self.train_data)):
            srcs, tgts, s_len, t_len = utils.pad_to_batch(batch, s_w2i, t_w2i)
            start_decode = Variable(LongTensor([[t_w2i['<s>']] * tgts.size(0)
                                                ])).transpose(0, 1)
            self.encoder.zero_grad()
            self.decoder.zero_grad()
            output, hidden_c = self.encoder(srcs, s_len)

            preds = self.decoder(start_decode, hidden_c, tgts.size(1), output,
                                 True)

            loss = self.loss_function(preds, tgts.view(-1))
            losses.append(loss.data.tolist()[0])
            loss.backward()
            torch.nn.utils.clip_grad_norm(self.encoder.parameters(), 50.0)
            torch.nn.utils.clip_grad_norm(self.decoder.parameters(), 50.0)
            self.enc_optim.step()
            self.dec_optim.step()
        return np.mean(losses)
Ejemplo n.º 2
0
    def train_one_epoch_autoencoder(self, obj):
        print('objective: %s' % obj)
        non_obj = 'src' if obj == 'tgt' else 'tgt'
        w2i = self.converters[obj]['w2i']
        # i2w = self.converters[obj]['i2w']
        embedder = self.embedders[obj]
        embedder_optim = self.optims[obj]
        losses = []
        for batch in tqdm(self.train_dataloader):

            # add noise
            org_batch = copy.deepcopy(batch)
            batch[obj] = [sent_noise.run(s) for s in org_batch[obj]]
            batch[non_obj] = org_batch[obj]

            # convert string to ids
            batch = utils.prepare_batch(batch, w2i, w2i)

            inputs, targets, input_lengths, target_lengths =\
                utils.pad_to_batch(batch, w2i, w2i)

            start_decode =\
                Variable(LT([[w2i['<s>']] * inputs.size(0)])).transpose(0, 1)
            self.encoder.zero_grad()
            self.decoder.zero_grad()
            embedder.zero_grad()

            if self.args.use_cuda:
                inputs = inputs.cuda()
                targets = targets.cuda()
                start_decode = start_decode.cuda()

            output, hidden_c = self.encoder(embedder,
                                            inputs,
                                            input_lengths)

            preds = self.decoder(embedder,
                                 start_decode,
                                 hidden_c,
                                 targets.size(1),
                                 output,
                                 None,
                                 True)
            loss = self.loss_func(preds, targets.view(-1))
            losses.append(loss.data[0])
            loss.backward()
            nn.utils.clip_grad_norm(self.encoder.parameters(), 50.0)
            nn.utils.clip_grad_norm(self.decoder.parameters(), 50.0)
            self.enc_optim.step()
            self.dec_optim.step()
            embedder_optim.step()
        # print(np.mean(losses))
        # preds = preds.view(inputs.size(0), targets.size(1), -1)
        # preds_max = torch.max(preds, 2)[1]
        # print(' '.join([i2w[p] for p in preds_max.data[0].tolist()]))
        # print(' '.join([i2w[p] for p in preds_max.data[1].tolist()]))
        return np.mean(losses)
Ejemplo n.º 3
0
    def train_one_epoch_translator(self, _from='src', _to='tgt'):
        print('%s -> %s' % (_from, _to))
        sw2i = self.converters[_from]['w2i']
        tw2i = self.converters[_to]['w2i']
        # ti2w = self.converters[_to]['i2w']
        src_embedder = self.embedders[_from]
        tgt_embedder = self.embedders[_to]
        src_embedder_optim = self.optims['src']
        tgt_embedder_optim = self.optims['tgt']
        losses = []
        for batch in tqdm(self.train_dataloader):
            batch = utils.prepare_batch(batch, sw2i, tw2i)
            inputs, targets, input_lengths, target_lengths =\
                utils.pad_to_batch(batch, sw2i, tw2i)

            start_decode =\
                Variable(LT([[tw2i['<s>']] * targets.size(0)])).transpose(0, 1)
            self.encoder.zero_grad()
            self.decoder.zero_grad()
            src_embedder.zero_grad()
            tgt_embedder.zero_grad()

            if self.args.use_cuda:
                inputs = inputs.cuda()
                targets = targets.cuda()
                start_decode = start_decode.cuda()

            output, hidden_c = self.encoder(src_embedder,
                                            inputs,
                                            input_lengths)

            preds = self.decoder(tgt_embedder,
                                 start_decode,
                                 hidden_c,
                                 targets.size(1),
                                 output,
                                 None,
                                 True)
            loss = self.loss_func(preds, targets.view(-1))
            losses.append(loss.data[0])
            loss.backward()
            nn.utils.clip_grad_norm(self.encoder.parameters(), 50.0)
            nn.utils.clip_grad_norm(self.decoder.parameters(), 50.0)
            self.enc_optim.step()
            self.dec_optim.step()
            src_embedder_optim.step()
            tgt_embedder_optim.step()
        print(np.mean(losses))
        preds = preds.view(inputs.size(0), targets.size(1), -1)
Ejemplo n.º 4
0
    def translate(self, sents, input_lang):
        output_lang = 'src' if input_lang == 'tgt' else 'tgt'
        # print('Translating %s -> %s...' % (non_obj, obj))
        sw2i = self.converters[input_lang]['w2i']
        tw2i = self.converters[output_lang]['w2i']
        ti2w = self.converters[output_lang]['i2w']

        src_embedder = self.prev_embedders[input_lang]
        tgt_embedder = self.prev_embedders[output_lang]
        encoder = self.prev_encoder
        decoder = self.prev_decoder

        batch = {'src': sents, 'tgt': sents}
        batch = utils.prepare_batch(batch, sw2i, tw2i)
        inputs, targets, input_lengths, target_lengths =\
            utils.pad_to_batch(batch, sw2i, tw2i)
        start_decode =\
            Variable(LT([[tw2i['<s>']] * targets.size(0)])).transpose(0, 1)
        if self.args.use_cuda:
            inputs = inputs.cuda()
            targets = targets.cuda()
            start_decode = start_decode.cuda()
        output, hidden_c = encoder(src_embedder,
                                   inputs,
                                   input_lengths)
        max_length = 50
        preds = decoder(tgt_embedder,
                        start_decode,
                        hidden_c,
                        max_length,
                        output,
                        None,
                        True)
        preds = preds.view(inputs.size(0), max_length, -1)
        preds_max = torch.max(preds, 2)[1]

        result_sents = []
        for i in range(len(sents)):
            result_sent =\
                ' '.join([ti2w.get(p, '<UNK>')
                          for p in preds_max.data[i].tolist()])
            result_sents.append(result_sent)
        return result_sents
Ejemplo n.º 5
0
    def calc_test_loss(self, log_dict):
        sw2i = self.sw2i
        tw2i = self.tw2i
        losses = []
        for batch in self.test_dataloader:
            batch = utils.prepare_batch(batch, sw2i, tw2i)
            inputs, targets, input_lengths, target_lengths =\
                utils.pad_to_batch(batch, sw2i, tw2i)

            start_decode =\
                Variable(LT([[tw2i['<s>']] * targets.size(0)]),
                         requires_grad=False)\
                .transpose(0, 1)

            if self.args.use_cuda:
                inputs = inputs.cuda()
                targets = targets.cuda()
                start_decode = start_decode.cuda()

            output, hidden_c = self.encoder(self.src_embedder, inputs,
                                            input_lengths)

            preds = self.decoder(self.tgt_embedder, start_decode, hidden_c,
                                 targets.size(1), output, None, True)
            loss = self.loss_func(preds, targets.view(-1))
            losses.append(loss.data[0])

        log_dict['test_loss'] = np.mean(losses)

        log_dict['sample_translation'] = {}
        log_dict['sample_translation']['src'] =\
            ' '.join([self.si2w[p] for p in inputs.data[0].tolist()])

        log_dict['sample_translation']['tgt'] =\
            ' '.join([self.ti2w[p] for p in targets.data[0].tolist()])

        preds = preds.view(inputs.size(0), targets.size(1), -1)
        preds_max = torch.max(preds, 2)[1]
        log_dict['sample_translation']['prediction'] =\
            ' '.join([self.ti2w[p] for p in preds_max.data[0].tolist()])

        return log_dict
Ejemplo n.º 6
0
    def train_one_epoch(self, log_dict):
        sw2i = self.sw2i
        tw2i = self.tw2i
        losses = []
        for batch in tqdm(self.train_dataloader):
            batch = utils.prepare_batch(batch, sw2i, tw2i)
            inputs, targets, input_lengths, target_lengths =\
                utils.pad_to_batch(batch, sw2i, tw2i)

            start_decode =\
                Variable(LT([[tw2i['<s>']] * targets.size(0)])).transpose(0, 1)
            self.encoder.zero_grad()
            self.decoder.zero_grad()
            self.src_embedder.zero_grad()
            self.tgt_embedder.zero_grad()

            if self.args.use_cuda:
                inputs = inputs.cuda()
                targets = targets.cuda()
                start_decode = start_decode.cuda()

            output, hidden_c = self.encoder(self.src_embedder,
                                            inputs,
                                            input_lengths)

            preds = self.decoder(self.tgt_embedder,
                                 start_decode,
                                 hidden_c,
                                 targets.size(1),
                                 output,
                                 None,
                                 True)
            loss = self.loss_func(preds, targets.view(-1))
            losses.append(loss.data[0])
            loss.backward()
            nn.utils.clip_grad_norm_(self.encoder.parameters(), 50.0)
            nn.utils.clip_grad_norm_(self.decoder.parameters(), 50.0)
            self.enc_optim.step()
            self.dec_optim.step()
            self.src_embedder_optim.step()
            self.tgt_embedder_optim.step()
        log_dict['train_loss'] = np.mean(losses)
Ejemplo n.º 7
0
    def train_one_epoch_adversarial(self):
        dataset = self.train_dataloader.dataset
        batch_size = self.args.batch_size // 2
        disc_losses = []
        gen_losses = []

        # Train Discriminator
        self.encoder.eval()
        self.src_embedder.eval()
        self.tgt_embedder.eval()
        self.discriminator.train()
        for _ in range(50):
            indxs = np.random.permutation(len(dataset))[:batch_size]
            srcs = np.array(dataset.src)[indxs]
            indxs = np.random.permutation(len(dataset))[:batch_size]
            tgts = np.array(dataset.tgt)[indxs]

            # For src lang
            batch = {'src': srcs, 'tgt': srcs}
            batch = utils.prepare_batch(batch, self.sw2i, self.sw2i)
            src_inputs, _, src_input_lengths, _ =\
                utils.pad_to_batch(batch, self.sw2i, self.sw2i)

            if self.args.use_cuda:
                src_inputs = src_inputs.cuda()

            _, src_output = self.encoder(self.src_embedder,
                                         src_inputs,
                                         src_input_lengths)

            # For tgt lang
            batch = {'src': tgts, 'tgt': tgts}
            batch = utils.prepare_batch(batch, self.tw2i, self.tw2i)
            tgt_inputs, _, tgt_input_lengths, _ =\
                utils.pad_to_batch(batch, self.tw2i, self.tw2i)

            if self.args.use_cuda:
                tgt_inputs = tgt_inputs.cuda()

            _, tgt_output = self.encoder(self.tgt_embedder,
                                         tgt_inputs,
                                         tgt_input_lengths)

            # Prepare input for discriminator
            disc_input =\
                torch.cat([src_output.contiguous().view(batch_size, -1),
                           tgt_output.contiguous().view(batch_size, -1)])

            y = torch.FloatTensor(batch_size * 2).zero_()
            y[:batch_size] = 1 - 0.1
            y[batch_size:] = 0.1
            y = Variable(y)

            if self.args.use_cuda:
                y = y.cuda()

            self.discriminator.zero_grad()
            self.disc_optim.zero_grad()
            preds = self.discriminator(disc_input)
            loss = self.disc_loss_func(preds, y)
            loss.backward()
            disc_losses.append(loss.data[0])
            self.disc_optim.step()

        # Train Generator (Encoder)
        self.encoder.train()
        self.src_embedder.train()
        self.tgt_embedder.train()
        self.discriminator.eval()
        for _ in range(50):
            indxs = np.random.permutation(len(dataset))[:batch_size]
            srcs = np.array(dataset.src)[indxs]
            indxs = np.random.permutation(len(dataset))[:batch_size]
            tgts = np.array(dataset.tgt)[indxs]

            # For src lang
            batch = {'src': srcs, 'tgt': srcs}
            batch = utils.prepare_batch(batch, self.sw2i, self.sw2i)
            src_inputs, _, src_input_lengths, _ =\
                utils.pad_to_batch(batch, self.sw2i, self.sw2i)

            if self.args.use_cuda:
                src_inputs = src_inputs.cuda()

            _, src_output = self.encoder(self.src_embedder,
                                         src_inputs,
                                         src_input_lengths)

            # For tgt lang
            batch = {'src': tgts, 'tgt': tgts}
            batch = utils.prepare_batch(batch, self.tw2i, self.tw2i)
            tgt_inputs, _, tgt_input_lengths, _ =\
                utils.pad_to_batch(batch, self.tw2i, self.tw2i)

            if self.args.use_cuda:
                tgt_inputs = tgt_inputs.cuda()

            _, tgt_output = self.encoder(self.tgt_embedder,
                                         tgt_inputs,
                                         tgt_input_lengths)

            # Prepare input for discriminator
            disc_input =\
                torch.cat([src_output.contiguous().view(batch_size, -1),
                           tgt_output.contiguous().view(batch_size, -1)])

            y = torch.FloatTensor(batch_size * 2).zero_()
            y[:batch_size] = 1 - 0.1
            y[batch_size:] = 0.1
            y = Variable(y)

            if self.args.use_cuda:
                y = y.cuda()

            self.encoder.zero_grad()
            self.src_embedder.zero_grad()
            self.tgt_embedder.zero_grad()
            preds = self.discriminator(disc_input)
            loss = self.disc_loss_func(preds, 1 - y)
            gen_losses.append(loss.data[0])
            loss.backward()
            self.enc_optim.zero_grad()
            self.src_embedder_optim.step()
            self.tgt_embedder_optim.step()
        return np.mean(disc_losses), np.mean(gen_losses)
Ejemplo n.º 8
0
    def train_one_epoch_cross_domain(self, obj, first_iter=False):
        non_obj = 'src' if obj == 'tgt' else 'tgt'
        print('Calculating cross domain loss %s to %s...' % (obj, non_obj))
        obj_w2i = self.converters[obj]['w2i']
        # obj_i2w = self.converters[obj]['i2w']
        non_obj_w2i = self.converters[non_obj]['w2i']
        # non_obj_i2w = self.converters[non_obj]['i2w']
        obj_embedder = self.embedders[obj]
        obj_embedder_optim = self.optims[obj]
        non_obj_embedder = self.embedders[non_obj]
        non_obj_embedder_optim = self.optims[non_obj]
        losses = []

        for batch in tqdm(self.train_dataloader):

            # translate obj to non_obj with previous iter model
            if first_iter:
                src_to_tgt = True if obj == 'src' else False
                naive_y =\
                    [' '.join(self.bi_dict.translate(sent.split(),
                                                     src_to_tgt=src_to_tgt))
                     for sent in batch[obj]]
            else:
                naive_y = self.translate(batch[obj], obj)

            noised_y = [sent_noise.run(sent) for sent in naive_y]

            batch['tgt'] = batch[obj]
            batch['src'] = noised_y

            # convert string to ids
            batch = utils.prepare_batch(batch, non_obj_w2i, obj_w2i)

            inputs, targets, input_lengths, target_lengths =\
                utils.pad_to_batch(batch, non_obj_w2i, obj_w2i)

            start_decode =\
                Variable(LT([[non_obj_w2i['<s>']] * inputs.size(0)]))\
                .transpose(0, 1)
            self.encoder.zero_grad()
            self.decoder.zero_grad()
            obj_embedder.zero_grad()
            non_obj_embedder.zero_grad()

            if self.args.use_cuda:
                inputs = inputs.cuda()
                targets = targets.cuda()
                start_decode = start_decode.cuda()

            output, hidden_c = self.encoder(non_obj_embedder,
                                            inputs,
                                            input_lengths)

            preds = self.decoder(obj_embedder,
                                 start_decode,
                                 hidden_c,
                                 targets.size(1),
                                 output,
                                 None,
                                 True)

            loss = self.loss_func(preds, targets.view(-1))
            losses.append(loss.data[0])
            loss.backward()
            nn.utils.clip_grad_norm(self.encoder.parameters(), 50.0)
            nn.utils.clip_grad_norm(self.decoder.parameters(), 50.0)
            self.enc_optim.step()
            self.dec_optim.step()
            obj_embedder_optim.step()
            non_obj_embedder_optim.step()

        # print(np.mean(losses))
        preds = preds.view(inputs.size(0), targets.size(1), -1)
        # preds_max = torch.max(preds, 2)[1]
        # print(' '.join([non_obj_i2w[p] for p in inputs.data[0].tolist()]))
        # print(' '.join([obj_i2w[p] for p in preds_max.data[0].tolist()]))
        # print(' '.join([obj_i2w[p] for p in targets.data[0].tolist()]))

        # print(' '.join([non_obj_i2w[p] for p in inputs.data[1].tolist()]))
        # print(' '.join([obj_i2w[p] for p in preds_max.data[1].tolist()]))
        # print(' '.join([obj_i2w[p] for p in targets.data[1].tolist()]))
        return np.mean(losses)