コード例 #1
0
    # init epoch training losses
    batch_reconstruction_losses = 0.0
    batch_discriminator_losses = 0.0
    batch_generator_losses = 0.0

    # determine if GPU training is enabled
    if (torch.backends.cudnn.version() != None) and (USE_CUDA == True):

        # set all networks / models in GPU mode
        encoder_train.cuda()
        decoder_train.cuda()
        discriminator_train.cuda()

    # set networks in training mode (apply dropout when needed)
    encoder_train.train()
    decoder_train.train()
    discriminator_train.train()

    # start timer
    start_time = datetime.now()

    # iterate over epoch mini batches
    for mini_batch_data in dataloader:

        # increase mini batch counter
        mini_batch_count += 1

        # determine if GPU training is enabled
        if (torch.backends.cudnn.version() != None) and (USE_CUDA == True):

            # convert mini batch to torch variable
コード例 #2
0
ファイル: main.py プロジェクト: lavoiems/Playground
                            nn.LeakyReLU(0.1, True), 4).cuda()
    gan.apply(weights_init)
    discriminator.apply(weights_init)
    weight_clip = weight_cliping(0.05)

    generator_optimizer = optim.Adam(gan.parameters(),
                                     lr=args.lr,
                                     betas=(args.beta1, args.beta2))
    discriminator_optimizer = optim.Adam(discriminator.parameters(),
                                         lr=args.lr,
                                         betas=(args.beta1, args.beta2))

    t = 0
    for i in range(201):
        print('Epoch: %s' % i)
        gan.train()
        discriminator.train()
        start = time.time()
        for data in train_loader:
            t += 1
            discriminator_optimizer.zero_grad()
            loss = get_disc_loss(data, gan, discriminator, data[0].shape[0],
                                 args.z_size, args.use_penalty)
            discriminator_optimizer.step()
            if args.use_weight_clip:
                discriminator.apply(weight_clip)
            if t == args.n_dis:
                t = 0
                generator_optimizer.zero_grad()
                loss = get_gen_loss(gan, discriminator, data[0].shape[0],
                                    args.z_size)
コード例 #3
0
class Mem2SeqRunner(ExperimentRunnerBase):
    def __init__(self, args):
        super(Mem2SeqRunner, self).__init__(args)

        # Model parameters
        self.gru_size = 128
        self.emb_size = 128
        #TODO: Try hops 4 with task 3
        self.hops = 3
        self.dropout = 0.2

        self.encoder = Encoder(self.hops, self.nwords, self.gru_size)
        self.decoder = Decoder(self.emb_size, self.hops, self.gru_size,
                               self.nwords)

        self.optim_enc = torch.optim.Adam(self.encoder.parameters(), lr=0.001)
        self.optim_dec = torch.optim.Adam(self.decoder.parameters(), lr=0.001)
        if self.loss_weighting:
            self.optim_loss_weights = torch.optim.Adam([self.loss_weights],
                                                       lr=0.0001)
        self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optim_dec,
                                                        mode='max',
                                                        factor=0.5,
                                                        patience=1,
                                                        min_lr=0.0001,
                                                        verbose=True)

        if self.use_cuda:
            self.cross_entropy = self.cross_entropy.cuda()
            self.encoder = self.encoder.cuda()
            self.decoder = self.decoder.cuda()
            if self.loss_weighting:
                self.loss_weights = self.loss_weights.cuda()

    def train_batch_wrapper(self, batch, new_epoch, clip_grads):
        context = batch[0].transpose(0, 1)
        responses = batch[1].transpose(0, 1)
        index = batch[2].transpose(0, 1)
        sentinel = batch[3].transpose(0, 1)
        context_lengths = batch[4]
        target_lengths = batch[5]
        return self.train_batch(context, responses, index, sentinel, new_epoch,
                                context_lengths, target_lengths, clip_grads)

    def train_batch(self, context, responses, index, sentinel, new_epoch,
                    context_lengths, target_lengths, clip_grads):

        # (TODO): remove transpose
        if new_epoch:  # (TODO): Change this part
            self.loss = 0
            self.ploss = 0
            self.vloss = 0
            self.n = 1

        context = context.type(self.TYPE)
        responses = responses.type(self.TYPE)
        index = index.type(self.TYPE)
        sentinel = sentinel.type(self.TYPE)

        self.optim_enc.zero_grad()
        self.optim_dec.zero_grad()
        if self.loss_weighting:
            self.optim_loss_weights.zero_grad()

        h = self.encoder(context.transpose(0, 1))
        self.decoder.load_memory(context.transpose(0, 1))
        y = torch.from_numpy(np.array([2] * context.size(1),
                                      dtype=int)).type(self.TYPE)
        y_len = 0

        h = h.unsqueeze(0)
        output_vocab = torch.zeros(max(target_lengths), context.size(1),
                                   self.nwords)
        output_ptr = torch.zeros(max(target_lengths), context.size(1),
                                 context.size(0))
        if self.use_cuda:
            output_vocab = output_vocab.cuda()
            output_ptr = output_ptr.cuda()
        while y_len < responses.size(0):  # TODO: Add EOS condition
            p_ptr, p_vocab, h = self.decoder(context, y, h)
            output_vocab[y_len] = p_vocab
            output_ptr[y_len] = p_ptr
            #TODO: Add teqacher forcing ratio
            y = responses[y_len].type(self.TYPE)
            y_len += 1

        # print(loss)
        mask_v = torch.ones(output_vocab.size())
        mask_p = torch.ones(output_ptr.size())
        if self.use_cuda:
            mask_p = mask_p.cuda()
            mask_v = mask_v.cuda()
        for i in range(responses.size(1)):
            mask_v[target_lengths[i]:, i, :] = 0
            mask_p[target_lengths[i]:, i, :] = 0

        loss_v = self.cross_entropy(
            output_vocab.contiguous().view(-1, self.nwords),
            responses.contiguous().view(-1))

        loss_ptr = self.cross_entropy(
            output_ptr.contiguous().view(-1, context.size(0)),
            index.contiguous().view(-1))
        if self.loss_weighting:
            loss = loss_ptr/(2*self.loss_weights[0]*self.loss_weights[0]) + loss_v/(2*self.loss_weights[1]*self.loss_weights[1]) + \
               torch.log(self.loss_weights[0] * self.loss_weights[1])
            loss_ptr = loss_ptr / (2 * self.loss_weights[0] *
                                   self.loss_weights[0])
            loss_v = loss_v / (2 * self.loss_weights[1] * self.loss_weights[1])
        else:
            loss = loss_ptr + loss_v
        loss.backward()
        ec = torch.nn.utils.clip_grad_norm_(self.encoder.parameters(), 10.0)
        dc = torch.nn.utils.clip_grad_norm_(self.decoder.parameters(), 10.0)
        self.optim_enc.step()
        self.optim_dec.step()
        if self.loss_weighting:
            self.optim_loss_weights.step()

        self.loss += loss.item()
        self.vloss += loss_v.item()
        self.ploss += loss_ptr.item()

        return loss.item(), loss_v.item(), loss_ptr.item()

    def evaluate_batch(self,
                       batch_size,
                       input_batches,
                       input_lengths,
                       target_batches,
                       target_lengths,
                       target_index,
                       target_gate,
                       src_plain,
                       profile_memory=None):

        # Set to not-training mode to disable dropout
        self.encoder.train(False)
        self.decoder.train(False)
        # Run words through encoder
        decoder_hidden = self.encoder(input_batches.transpose(0,
                                                              1)).unsqueeze(0)
        self.decoder.load_memory(input_batches.transpose(0, 1))

        # Prepare input and output variables
        decoder_input = Variable(torch.LongTensor([2] * batch_size))

        decoded_words = []
        all_decoder_outputs_vocab = Variable(
            torch.zeros(max(target_lengths), batch_size, self.nwords))
        all_decoder_outputs_ptr = Variable(
            torch.zeros(max(target_lengths), batch_size,
                        input_batches.size(0)))
        # all_decoder_outputs_gate = Variable(torch.zeros(self.max_r, batch_size))
        # Move new Variables to CUDA

        if self.use_cuda:
            all_decoder_outputs_vocab = all_decoder_outputs_vocab.cuda()
            all_decoder_outputs_ptr = all_decoder_outputs_ptr.cuda()
            # all_decoder_outputs_gate = all_decoder_outputs_gate.cuda()
            decoder_input = decoder_input.cuda()

        p = []
        for elm in src_plain:
            elm_temp = [word_triple[0] for word_triple in elm]
            p.append(elm_temp)

        self.from_whichs = []
        acc_gate, acc_ptr, acc_vac = 0.0, 0.0, 0.0
        # Run through decoder one time step at a time
        for t in range(max(target_lengths)):
            decoder_ptr, decoder_vacab, decoder_hidden = self.decoder(
                input_batches, decoder_input, decoder_hidden)
            all_decoder_outputs_vocab[t] = decoder_vacab
            topv, topvi = decoder_vacab.data.topk(1)
            all_decoder_outputs_ptr[t] = decoder_ptr
            topp, toppi = decoder_ptr.data.topk(1)
            top_ptr_i = torch.gather(input_batches[:, :, 0], 0,
                                     Variable(toppi.view(1,
                                                         -1))).transpose(0, 1)
            next_in = [
                top_ptr_i[i].item() if
                (toppi[i].item() < input_lengths[i] - 1) else topvi[i].item()
                for i in range(batch_size)
            ]
            # if next_in in self.kb_entry.keys():
            #     ptr_distr.append([next_in, decoder_vacab.data])

            decoder_input = Variable(
                torch.LongTensor(next_in))  # Chosen word is next input
            if self.use_cuda: decoder_input = decoder_input.cuda()

            temp = []
            from_which = []
            for i in range(batch_size):
                if (toppi[i].item() < len(p[i]) - 1):
                    temp.append(p[i][toppi[i].item()])
                    from_which.append('p')
                else:
                    if target_index[t][i] != toppi[i].item():
                        self.incorrect_sentinel += 1
                    ind = topvi[i].item()
                    if ind == 3:
                        temp.append('<eos>')
                    else:
                        temp.append(self.i2w[ind])
                    from_which.append('v')
            decoded_words.append(temp)
            self.from_whichs.append(from_which)
        self.from_whichs = np.array(self.from_whichs)

        loss_v = self.cross_entropy(
            all_decoder_outputs_vocab.contiguous().view(-1, self.nwords),
            target_batches.contiguous().view(-1))
        loss_ptr = self.cross_entropy(
            all_decoder_outputs_ptr.contiguous().view(-1,
                                                      input_batches.size(0)),
            target_index.contiguous().view(-1))

        if self.loss_weighting:
            loss = loss_ptr/(2*self.loss_weights[0]*self.loss_weights[0]) + loss_v/(2*self.loss_weights[1]*self.loss_weights[1]) + \
               torch.log(self.loss_weights[0] * self.loss_weights[1])
        else:
            loss = loss_ptr + loss_v

        self.loss += loss.item()
        self.vloss += loss_v.item()
        self.ploss += loss_ptr.item()
        self.n += 1

        # Set back to training mode
        self.encoder.train(True)
        self.decoder.train(True)
        return decoded_words, self.from_whichs  # , acc_ptr, acc_vac

    def save_models(self, path):
        torch.save(self.encoder.state_dict(),
                   os.path.join(path, 'encoder.pth'))
        torch.save(self.decoder.state_dict(),
                   os.path.join(path, 'decoder.pth'))

    def load_models(self, path: str = '.'):
        self.encoder.load_state_dict(
            torch.load(os.path.join(path, 'encoder.pth')))
        self.decoder.load_state_dict(
            torch.load(os.path.join(path, 'decoder.pth')))
コード例 #4
0
        real_center = real_center.to(
            device)  # real_center.shape = [24, 1, 512, 3]
        input_cropped1 = input_cropped1.to(
            device)  # input_cropped1.shape = [24, 1, 2048, 3]
        label = label.to(device)  # real label construction done

        # obtain data for the two channels
        real_center = torch.squeeze(real_center, 1)  # [24, 512, 3]
        real_center_key1 = utils.index_points(real_center,
                                              real_center_key1_idx)

        input_cropped1 = torch.squeeze(input_cropped1, 1)

        input_cropped = [input_cropped1, input_cropped2
                         ]  # make sure if inputs are 2048 and 512
        gen_net = gen_net.train()
        dis_net = dis_net.train()

        # update discriminator
        dis_net.zero_grad()
        real_center = torch.unsqueeze(real_center, 1)
        print('real center shape', real_center.shape)
        real_out = dis_net(real_center)
        #print('real label shape', label.shape)
        dis_err_real = criterion(real_out, label)
        dis_err_real.backward()

        fake_center1, fake_fine = gen_net(input_cropped1)

        errG_l2 = criterion_PointLoss(torch.squeeze(fake_fine,1),torch.squeeze(real_center,1))\
        +lam1*criterion_PointLoss(fake_center1,real_center_key1) # generator loss(AE loss)