Beispiel #1
0
def load_checkpoint(model: torch.nn.Module,
                    optimizer: torch.optim.Adam = torch.optim.Adam,
                    file: str = None) -> int:
    r""" Quick loading model functions

    Args:
        model (nn.Module): Neural network model.
        optimizer (torch.optim): Model optimizer. (Default: torch.optim.Adam)
        file (str): Model file.

    Returns:
        How much epoch to start training from.
    """
    if os.path.isfile(file):
        logger.info(f"[*] Loading checkpoint `{file}`.")
        checkpoint = torch.load(file)
        epoch = checkpoint["epoch"]
        model.load_state_dict(checkpoint["state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        logger.info(
            f"[*] Loaded checkpoint `{file}` (epoch {checkpoint['epoch']})")
    else:
        logger.info(f"[!] no checkpoint found at '{file}'")
        epoch = 0

    return epoch
def train_loop(configs: dict, model: GTransformer, opt: torch.optim.Adam,
               train: Dataset, test: Dataset,
               text_encoder: WhitespaceEncoder) -> GTransformer:
    """
    Main training loop.

    :param configs: Configs defined on the default.yaml file.
    :param model: Sequence-to-sequence transformer.
    :param opt: Adam optimizer.
    :param train: The dataset used for training.
    :param test: The dataset used for validation.
    :param text_encoder: Torch NLP text encoder for tokenization and vectorization.
    """
    for e in range(configs.get('num_epochs', 8)):
        print(f'\n Epoch {e}')
        model.train()

        nr_batches = math.ceil(len(train) / configs.get('batch_size', 8))
        train_iter, test_iter = get_iterators(configs, train, test)
        total_loss, steps = 0, 0

        for sample in tqdm.tqdm(train_iter, total=nr_batches):
            # 0) Zero out previous grads
            opt.zero_grad()

            # 1) Prepare Sample
            src, src_lengths, trg, shifted_trg, trg_lengths = prepare_sample(
                sample, text_encoder)

            # 2) Run model
            lprobs = model(
                src=src.cuda(),
                trg=shifted_trg.cuda(),
                src_mask=lengths_to_mask(src_lengths).unsqueeze(1).cuda(),
                trg_mask=lengths_to_mask(trg_lengths).unsqueeze(1).cuda())

            # 3) Compute loss
            loss = F.nll_loss(lprobs.transpose(2, 1),
                              trg.cuda(),
                              reduction='mean')
            loss.backward()

            # 4) Update training metrics
            total_loss += float(loss.item())
            steps += int(trg.ne(0).sum())

            # 5) clip gradients
            # - If the total gradient vector has a length > 1, we clip it back down to 1.
            if configs.get('gradient_clipping', -1) > 0.0:
                nn.utils.clip_grad_norm_(model.parameters(),
                                         configs.get('gradient_clipping'))

            # 6) Optim step
            opt.step()

        print(f'-- total train loss {total_loss:.4}')
        total_steps = steps * (e + 1)
        print(f'-- train steps {total_steps}')
        validate(model, test_iter, text_encoder)
    return model
Beispiel #3
0
    def train(
            model: DepressionDetector, optimizer: torch.optim.Adam,
            train_dataset: Dataset, batch_size, num_epochs, writer,
            reduction_loss, tensorboard_batch=100, _shuffle=True, last_epoch_count=0
    ):
        """
        train a LSTMNetwork object
        :param optimizer:
        :param last_epoch_count:
        :param _shuffle:
        :param reduction_loss:
        :param tensorboard_batch:
        :param writer:
        :param batch_size:
        :param train_dataset:
        :param num_epochs:
        :param model:
        :return:
        """
        loss_fn = torch.nn.MSELoss(reduction=reduction_loss).cuda()
        train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=_shuffle)

        model.train()
        running_loss_epoch = 0.0
        running_loss_batches = 0.0
        last_epoch = last_epoch_count + 1 if last_epoch_count != 0 else last_epoch_count

        for epoch in range(0, num_epochs):
            try:
                for batch, data in enumerate(train_data_loader, 0):
                    inputs, labels = data
                    if inputs.shape[0] == batch_size:
                        inputs, labels = inputs.cuda(), labels.cuda()
                        out = model(inputs)
                        loss = loss_fn(out.float(), labels.float())
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                        running_loss_batches += loss.item()
                        running_loss_epoch += loss.item()
                        print(f'Epoch {epoch + 1} batch {batch + 1} train loss: {loss.item()}')

                        if batch % tensorboard_batch == tensorboard_batch - 1:
                            writer.add_scalar(f'training loss per {tensorboard_batch} batches',
                                              running_loss_batches / tensorboard_batch,
                                              last_epoch * len(train_data_loader) + batch)
                            running_loss_batches = 0.0

                writer.add_scalar('training loss per epoch', running_loss_epoch, last_epoch)
                running_loss_epoch = 0.0
                last_epoch += 1
            except KeyboardInterrupt:
                return model, optimizer, last_epoch
        return model, optimizer, last_epoch
Beispiel #4
0
def train(model: Model, optimizer: torch.optim.Adam, epoch_num, train_loader,
          test_loader, save_dir_best, save_dir_final, device: torch.device):

    train_losses = []

    best_test_auc = 0.0

    for epoch in tqdm(range(epoch_num)):

        model.train()

        for _, (hist_seq, hist_answers, new_seq, target_answers,
                _) in tqdm(enumerate(train_loader)):

            hist_seq, hist_answers, new_seq, target_answers = \
                hist_seq.to(device), hist_answers.to(device), new_seq.to(device), target_answers.to(device)

            # * foward pass
            # (batch_size, seq_len - 1, 1)
            pred = model(hist_seq, hist_answers, new_seq)

            # * compute loss
            loss = model.loss(pred, target_answers.float())

            train_losses.append(loss.item())

            # * backward pass & update
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        epoch_loss = np.sum(train_losses) / len(train_losses)

        model.eval()

        test_auc = evaluate(model, test_loader, device)

        print("epoch {}: train_loss: {}, test_auc: {}".format(
            epoch + 1, epoch_loss, test_auc))

        wandb.log({"train_loss": epoch_loss, "test_auc": test_auc})

        if test_auc > best_test_auc:
            best_test_auc = test_auc
            torch.save(model.state_dict(), save_dir_best)
            print("best_auc: {} at epoch {}".format(best_test_auc, epoch + 1))

    wandb.log({"best_auc": best_test_auc})
    print("best_auc: {}".format(best_test_auc))
    torch.save(model.state_dict(), save_dir_final)

    print("done.")
Beispiel #5
0
def train_psnr(dataloader: torch.utils.data.DataLoader,
               model: nn.Module,
               criterion: nn.MSELoss,
               optimizer: torch.optim.Adam,
               epoch: int,
               scaler: amp.GradScaler,
               writer: SummaryWriter,
               args: argparse.ArgumentParser.parse_args):
    batch_time = AverageMeter("Time", ":6.4f")
    losses = AverageMeter("Loss", ":.6f")
    progress = ProgressMeter(num_batches=len(dataloader),
                             meters=[batch_time, losses],
                             prefix=f"Epoch: [{epoch}]")

    # switch to train mode
    model.train()

    end = time.time()
    for i, (lr, hr) in enumerate(dataloader):
        # Move data to special device.
        if args.gpu is not None:
            lr = lr.cuda(args.gpu, non_blocking=True)
            hr = hr.cuda(args.gpu, non_blocking=True)

        optimizer.zero_grad()

        with amp.autocast():
            sr = model(lr)
            loss = criterion(sr, hr)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # measure accuracy and record loss
        losses.update(loss.item(), lr.size(0))

        iters = i + epoch * len(dataloader) + 1
        writer.add_scalar("Train/Loss", loss.item(), iters)

        # Output results every 100 batches.
        if i % 100 == 0:
            progress.display(i)

        # Save image every 300 batches.
        if iters % 300 == 0:
            vutils.save_image(hr.detach(), os.path.join("runs", "hr", f"PSNR_{iters}.bmp"))
            vutils.save_image(sr.detach(), os.path.join("runs", "sr", f"PSNR_{iters}.bmp"))
Beispiel #6
0
def train_loop(configs: dict, model: CTransformer, opt: torch.optim.Adam,
               train: Dataset, test: Dataset, text_encoder: WhitespaceEncoder,
               label_encoder: LabelEncoder) -> CTransformer:
    """
    Main training loop.
    :param configs: Configs defined on the default.yaml file.
    :param model: Transformer Classifier.
    :param opt: Adam optimizer.
    :param train: The dataset used for training.
    :param test: The dataset used for validation.
    :param text_encoder: Torch NLP text encoder for tokenization and vectorization.
    :param label_encoder: Torch NLP label encoder for vectorization of the labels.
    """
    seen = 0
    for e in range(configs.get('num_epochs', 8)):
        print(f'\n Epoch {e}')
        model.train()

        nr_batches = math.ceil(len(train) / configs.get('batch_size', 8))
        train_iter, test_iter = get_iterators(configs, train, test)

        for sample in tqdm.tqdm(train_iter, total=nr_batches):
            # 0) Zero out previous grads
            opt.zero_grad()

            # 1) Prepare Sample
            input_seqs, input_mask, targets = prepare_sample(
                sample, text_encoder, label_encoder,
                configs.get('max_length', 256))

            # 2) Run model
            out = model(input_seqs.cuda(), input_mask.cuda())

            # 3) Compute loss
            loss = F.nll_loss(out, targets.cuda())
            loss.backward()

            # 4) clip gradients
            # - If the total gradient vector has a length > 1, we clip it back down to 1.
            if configs.get('gradient_clipping', -1) > 0.0:
                nn.utils.clip_grad_norm_(model.parameters(),
                                         configs.get('gradient_clipping'))

            # 5) Optim step
            opt.step()

            # 6) Update number of seen examples...
            seen += input_seqs.size(0)

        validate(model, text_encoder, label_encoder,
                 configs.get('max_length', 256), test_iter)
    return model
Beispiel #7
0
def train(net: Network, optimizer: torch.optim.Adam,
          train_loader: torch.utils.data.DataLoader, epoch: int):
    net.train()
    for batch_idx, (x, y) in enumerate(train_loader):
        optimizer.zero_grad()
        output = net(x.view(-1, 28 * 28).to(net.device))
        loss = F.nll_loss(output, y.to(net.device))
        loss.backward()
        optimizer.step()
        if batch_idx % 100 == 0:
            print(
                f"Train Epoch: {epoch}, Step: {batch_idx*len(x)}/{len(train_loader.dataset)}, Loss: {loss.item()}"
            )
Beispiel #8
0
def train_psnr(train_dataloader: torch.utils.data.DataLoader,
               generator: nn.Module, pixel_criterion: nn.MSELoss,
               psnr_optimizer: torch.optim.Adam, epoch: int,
               writer: SummaryWriter,
               args: argparse.ArgumentParser.parse_args):
    batch_time = AverageMeter("Time", ":6.4f")
    mse_losses = AverageMeter("MSE Loss", ":.6f")
    progress = ProgressMeter(len(train_dataloader), [batch_time, mse_losses],
                             prefix=f"Epoch: [{epoch}]")

    # switch to train mode
    generator.train()

    end = time.time()
    for i, (lr, hr) in enumerate(train_dataloader):
        # Move data to special device.
        if args.gpu is not None:
            lr = lr.cuda(args.gpu, non_blocking=True)
            hr = hr.cuda(args.gpu, non_blocking=True)

        generator.zero_grad()

        # Generating fake high resolution images from real low resolution images.
        sr = generator(lr)
        # The MSE Loss of the generated fake high-resolution image and real high-resolution image is calculated.
        mse_loss = pixel_criterion(sr, hr)
        mse_loss.backward()
        psnr_optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # measure accuracy and record loss
        mse_losses.update(mse_loss.item(), lr.size(0))

        iters = i + epoch * len(train_dataloader) + 1
        writer.add_scalar("Train/MSE Loss", mse_loss.item(), iters)

        # Output results every 100 batches.
        if i % 100 == 0:
            progress.display(i)

        # Save image every 1000 batches.
        if iters % 1000 == 0:
            vutils.save_image(hr,
                              os.path.join("runs", "hr", f"PSNR_{iters}.bmp"))
            sr = generator(lr)
            vutils.save_image(sr.detach(),
                              os.path.join("runs", "sr", f"PSNR_{iters}.bmp"))
Beispiel #9
0
    def __save_models(self, gen: Generator, disc: Discriminator,
                      optim_gen: th.optim.Adam, optim_disc: th.optim.Adam):
        # Save discriminator
        th.save(disc.state_dict(),
                join(self.__output_dir, f"disc_{self.__curr_save}.pt"))

        th.save(optim_disc.state_dict(),
                join(self.__output_dir, f"optim_disc_{self.__curr_save}.pt"))

        # save generator
        th.save(gen.state_dict(),
                join(self.__output_dir, f"gen_{self.__curr_save}.pt"))

        th.save(optim_gen.state_dict(),
                join(self.__output_dir, f"optim_gen_{self.__curr_save}.pt"))
Beispiel #10
0
    def train_segments(self, model, l_loss, m_loss,
                       optimizer: torch.optim.Adam, train_set):
        model.train(mode=True)
        accuracy_classification_sum = 0
        loss_m_sum = 0
        loss_l1_sum = 0
        loss_classification_sum = 0
        batch_count = 0

        for images, segments, labels in train_set:
            labels, segments = model_utils.reduce_to_class_number(
                self.left_class_number, self.right_class_number, labels,
                segments)
            images, labels, segments = self.convert_data_and_label(
                images, labels, segments)
            segments = self.puller(segments)
            optimizer.zero_grad()
            model_classification, model_segmentation = model_utils.wait_while_can_execute(
                model, images)

            classification_loss = l_loss(model_classification, labels)
            segmentation_loss = m_loss(model_segmentation, segments)

            #torch.cuda.empty_cache()
            segmentation_loss.backward()
            optimizer.step()

            output_probability, output_cl, cl_acc = self.calculate_accuracy(
                labels, model_classification, labels.size(0))
            self.save_train_data(labels, output_cl, output_probability)

            # accumulate information
            accuracy_classification_sum += model_utils.scalar(cl_acc.sum())
            loss_m_sum += model_utils.scalar(segmentation_loss.sum())
            loss_l1_sum += 0
            loss_classification_sum += model_utils.scalar(
                classification_loss.sum())
            batch_count += 1
            #self.de_convert_data_and_label(images, labels, segments)
            #torch.cuda.empty_cache()
        model.train(mode=False)
        return accuracy_classification_sum / (
            batch_count +
            p.EPS), loss_m_sum / (batch_count + p.EPS), loss_l1_sum / (
                batch_count + p.EPS), loss_classification_sum / (batch_count +
                                                                 p.EPS)
def _train_step(
    batch_x: torch.Tensor,
    batch_y: torch.Tensor,
    cavity_model_net: CavityModel,
    optimizer: torch.optim.Adam,
    loss_function: torch.nn.CrossEntropyLoss,
) -> (torch.Tensor, float):
    """
    Helper function to take a training step
    """
    cavity_model_net.train()
    optimizer.zero_grad()
    batch_y_pred = cavity_model_net(batch_x)
    loss_batch = loss_function(batch_y_pred, torch.argmax(batch_y, dim=-1))
    loss_batch.backward()
    optimizer.step()
    return (batch_y_pred, loss_batch.detach().cpu().item())
Beispiel #12
0
def _train_step(batch: torch.Tensor, pointpillars: torch.nn.Module,
                loss_func: torch.nn.Module, optimizer: torch.optim.Adam,
                epoch: int, i: int) -> torch.nn.Module:
    """
    Performs a training step
    """

    pil_batch, ind_batch, label_batch, label_mask = batch

    # -> forward pass through network
    preds = pointpillars(pil_batch, ind_batch, label_batch, label_mask)

    loss = loss_func(preds, writer, epoch, i)
    loss.backward()
    optimizer.step()

    del pil_batch, ind_batch, label_batch, preds

    return loss
Beispiel #13
0
def train_epoch(model: nn.Module, train_loader: DataLoader,
                criterion: nn.CrossEntropyLoss, optimizer: torch.optim.Adam,
                device: torch.device, ration):
    epoch_loss = 0.0

    model.train()

    for data in train_loader:
        optimizer.zero_grad()

        prediction, target = model(data, device=device, ration=ration)

        loss = criterion(prediction, target)
        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()

    return epoch_loss / len(train_loader.dataset)
Beispiel #14
0
def train_step_from_batch(
    ts_chunks: torch.Tensor,
    targets: torch.Tensor,
    distr_tcn: DistributionalTCN,
    optimizer: torch.optim.Adam,
):
    """
    Arguments
    ----------
    ts_chunks: Mini-batch chunked from the time series
    targets: Corresponding chunk of target values
    distr_tcn: DistributionalTCN
    otimizer: Optimizer containing parameters, learning rate, etc
    """
    distr_outputs = distr_tcn(ts_chunks.float())
    loss = -distr_outputs.log_prob(targets.float())
    loss = loss.mean()
    loss_value = loss.cpu().detach().numpy()
    loss.backward()
    optimizer.step()
    return loss_value
Beispiel #15
0
    def train_loop(num_of_epoch: int, input_data: torch.autograd.Variable, ground_truth: torch.autograd.Variable,
                   optimizer: torch.optim.Adam, model: torch.nn.Sequential):
        """A simple train loop.

                Args:
                    num_of_epoch (int): Number of epoch.
                    input_data (torch.autograd.Variable): Input data.
                    ground_truth(torch.autograd.Variable):  Ground truth.
                    optimizer (torch.optim.Adam): ADAM optimizer
                    model(torch.nn.Sequential): Neural network model.

        """
        loss_fn = torch.nn.MSELoss(reduction='sum')

        for t in range(num_of_epoch):
            output_pred = model(input_data)
            loss = loss_fn(output_pred, ground_truth)
            optimizer.zero_grad()
            loss.backward()

            optimizer.step()
Beispiel #16
0
def train_gan(train_dataloader: torch.utils.data.DataLoader,
              discriminator: nn.Module, generator: nn.Module,
              content_criterion: VGGLoss, adversarial_criterion: nn.BCELoss,
              discriminator_optimizer: torch.optim.Adam,
              generator_optimizer: torch.optim.Adam, epoch: int,
              writer: SummaryWriter, args: argparse.ArgumentParser.parse_args):
    batch_time = AverageMeter("Time", ":.4f")
    d_losses = AverageMeter("D Loss", ":.6f")
    g_losses = AverageMeter("G Loss", ":.6f")
    content_losses = AverageMeter("Content Loss", ":.4f")
    adversarial_losses = AverageMeter("Adversarial Loss", ":.4f")
    d_hr_values = AverageMeter("D(x)", ":.4f")
    d_sr1_values = AverageMeter("D(SR1)", ":.4f")
    d_sr2_values = AverageMeter("D(SR2)", ":.4f")

    progress = ProgressMeter(len(train_dataloader), [
        batch_time, d_losses, g_losses, content_losses, adversarial_losses,
        d_hr_values, d_sr1_values, d_sr2_values
    ],
                             prefix=f"Epoch: [{epoch}]")

    # switch to train mode
    discriminator.train()
    generator.train()

    end = time.time()
    for i, (lr, hr) in enumerate(train_dataloader):
        # Move data to special device.
        if args.gpu is not None:
            lr = lr.cuda(args.gpu, non_blocking=True)
            hr = hr.cuda(args.gpu, non_blocking=True)
        batch_size = lr.size(0)

        # The real sample label is 1, and the generated sample label is 0.
        real_label = torch.full((batch_size, 1), 1,
                                dtype=lr.dtype).cuda(args.gpu,
                                                     non_blocking=True)
        fake_label = torch.full((batch_size, 1), 0,
                                dtype=lr.dtype).cuda(args.gpu,
                                                     non_blocking=True)

        ##############################################
        # (1) Update D network: maximize - E(hr)[log(D(hr))] + E(lr)[log(1- D(G(lr))]
        ##############################################
        # Set discriminator gradients to zero.
        discriminator.zero_grad()

        real_output = discriminator(hr)
        # Let the discriminator realize that the sample is real.
        d_loss_real = adversarial_criterion(real_output, real_label)

        # Generating fake high resolution images from real low resolution images.
        sr = generator(lr)
        fake_output = discriminator(sr.detach())
        # Let the discriminator realize that the sample is false.
        d_loss_fake = adversarial_criterion(fake_output, fake_label)

        # Count all discriminator losses.
        d_loss = (d_loss_real + d_loss_fake) / 2
        d_loss.backward()
        d_hr = real_output.mean().item()
        d_sr1 = fake_output.mean().item()

        # Update discriminator optimizer gradient information.
        discriminator_optimizer.step()

        ##############################################
        # (2) Update G network: content loss + 0.001 * adversarial loss
        ##############################################
        # Set discriminator gradients to zero.
        generator.zero_grad()

        # Based on VGG19_36th pre training model to find the maximum square error between feature maps.
        content_loss = content_criterion(sr, hr.detach())

        fake_output = discriminator(sr)
        # Let the discriminator realize that the sample is true.
        adversarial_loss = adversarial_criterion(fake_output, real_label)
        g_loss = content_loss + 0.001 * adversarial_loss
        g_loss.backward()
        d_sr2 = fake_output.mean().item()

        # Update generator optimizer gradient information.
        generator_optimizer.step()

        # measure accuracy and record loss
        d_losses.update(d_loss.item(), lr.size(0))
        g_losses.update(g_loss.item(), lr.size(0))
        content_losses.update(content_loss.item(), lr.size(0))
        adversarial_losses.update(adversarial_loss.item(), lr.size(0))
        d_hr_values.update(d_hr, lr.size(0))
        d_sr1_values.update(d_sr1, lr.size(0))
        d_sr2_values.update(d_sr2, lr.size(0))

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        iters = i + epoch * len(train_dataloader) + 1
        writer.add_scalar("Train/D Loss", d_loss.item(), iters)
        writer.add_scalar("Train/G Loss", g_loss.item(), iters)
        writer.add_scalar("Train/Content Loss", content_loss.item(), iters)
        writer.add_scalar("Train/Adversarial Loss", adversarial_loss.item(),
                          iters)
        writer.add_scalar("Train/D(LR)", d_hr, iters)
        writer.add_scalar("Train/D(SR1)", d_sr1, iters)
        writer.add_scalar("Train/D(SR2)", d_sr2, iters)

        # Output results every 100 batches.
        if i % 100 == 0:
            progress.display(i)

        # Save image every 1000 batches.
        if iters % 1000 == 0:
            vutils.save_image(hr, os.path.join("runs", "hr",
                                               f"GAN_{iters}.bmp"))
            sr = generator(lr)
            vutils.save_image(sr.detach(),
                              os.path.join("runs", "sr", f"GAN_{iters}.bmp"))
Beispiel #17
0
 def fit(self,
         train_dataloader: DataLoader,
         train_len: int,
         epochs: int,
         criterion: nn.CrossEntropyLoss,
         optimizer: torch.optim.Adam,
         verbose=True,
         device="cuda",
         test_dataloader=None,
         test_len=None,
         use_nni=False,
         save_checkpoints=False,
         model_save_threshold=0.85) -> dict:
     self.train()
     results = dict()
     results["train_acc"] = list()
     results["train_loss"] = list()
     results["train_precision"] = list()
     results["train_recall"] = list()
     results["train_f1"] = list()
     if test_dataloader is not None:
         results["test_acc"] = list()
         results["test_loss"] = list()
         results["test_precision"] = list()
         results["test_recall"] = list()
         results["test_f1"] = list()
     self.to(device)
     if verbose:
         print("statring training...")
     for epoch in tqdm.tqdm(
             range(epochs)):  # loop over the dataset multiple times
         running_loss = 0.0
         for i, data in enumerate(
                 tqdm.tqdm(train_dataloader)):  # mini-batch
             self.train()
             inputs, mask, target_mask, labels = data
             outputs = self(inputs, mask, target_mask)
             loss = criterion(outputs, labels)
             loss.backward()
             optimizer.step()
             optimizer.zero_grad()
             # print statistics
             #running_loss += loss.item()
         y_real, y_pred = calc_performance(self, train_dataloader)
         acc, precision, recall, f1 = calc_classification_metrics(
             y_true=y_real, y_pred=y_pred)
         if verbose:
             print(
                 f'\tEp #{epoch} | Train. Loss: {loss:.3f} | Acc: {acc * 100:.2f}% | Precision: {precision * 100:.2f}% | Recall: {recall * 100:.2f}% | F1: {f1 * 100:.2f}%'
             )
         results["train_acc"].append(acc)
         results["train_loss"].append(loss)
         results["train_precision"].append(precision)
         results["train_recall"].append(recall)
         results["train_f1"].append(f1)
         if test_dataloader is not None:
             y_real, y_pred = calc_performance(self, test_dataloader)
             test_acc, precision, recall, test_f1 = calc_classification_metrics(
                 y_true=y_real, y_pred=y_pred)
             if verbose:
                 print(
                     f'\tEp #{epoch} | Dev. '
                     f'cc: {acc * 100:.2f}% | Precision: {precision * 100:.2f}% | Recall: {recall * 100:.2f}% | F1: {f1 * 100:.2f}%'
                 )
             results["test_acc"].append(acc)
             results["test_loss"].append(loss)
             results["test_precision"].append(precision)
             results["test_recall"].append(recall)
             results["test_f1"].append(f1)
             if save_checkpoints and model_save_threshold <= acc:
                 model_name = self.generate_model_save_name(acc)
                 model_path = os.path.join("models", model_name)
                 torch.save(self, model_path)
         if use_nni:
             nni.report_intermediate_result({
                 "train_acc": acc,
                 "train_f1": f1,
                 "default": test_acc,
                 "test_f1": test_f1
             })
     if verbose:
         print('Finished Training')
     return results
Beispiel #18
0
    def train(
        #train_config: TrainingConfiguration, model: nn.Module, optimizer: torch.optim.Optimizer,
        train_config: TrainingConfiguration, model: nn.Module, optimizer: torch.optim.Adam,
        train_loader: torch.utils.data.DataLoader, epoch_idx: int
    ) -> None:
        
        # change model in training mood
        model.train()
        
        # to get batch loss
        batch_loss = np.array([])
        
        # to get batch accuracy
        batch_acc = np.array([])
            
        for batch_idx, (data, target) in enumerate(train_loader):
            
            # clone target
            indx_target = target.clone()
            # send data to device (its is medatory if GPU has to be used)
            data = data.to(train_config.device)
            # send target to device
            target = target.to(train_config.device)

            # reset parameters gradient to zero
            optimizer.zero_grad()
            
            # forward pass to the model
            output = model(data)
            
            # cross entropy loss
            loss = F.cross_entropy(output, target)
            
            # find gradients w.r.t training parameters
            loss.backward()
            # Update parameters using gardients
            optimizer.step()
            
            batch_loss = np.append(batch_loss, [loss.item()])
            
            # Score to probability using softmax
            prob = F.softmax(output, dim=1)
                
            # get the index of the max probability
            pred = prob.data.max(dim=1)[1]  
                            
            # correct prediction
            correct = pred.cpu().eq(indx_target).sum()
                
            # accuracy
            acc = float(correct) / float(len(data))
            
            batch_acc = np.append(batch_acc, [acc])

            if batch_idx % train_config.log_interval == 0 and batch_idx > 0:              
                print(
                    'Train Epoch: {} [{}/{}] Loss: {:.6f} Acc: {:.4f}'.format(
                        epoch_idx, batch_idx * len(data), len(train_loader.dataset), loss.item(), acc
                    )
                )
                
        epoch_loss = batch_loss.mean()
        epoch_acc = batch_acc.mean()
        return epoch_loss, epoch_acc
Beispiel #19
0
def train_gan(dataloader: torch.utils.data.DataLoader,
              discriminator: nn.Module,
              discriminator_optimizer: torch.optim.Adam, generator: nn.Module,
              generator_optimizer: torch.optim.Adam,
              pixel_criterion: nn.L1Loss, content_criterion: VGGLoss,
              adversarial_criterion: nn.BCEWithLogitsLoss, epoch: int,
              scaler: amp.GradScaler, writer: SummaryWriter,
              args: argparse.ArgumentParser.parse_args):
    batch_time = AverageMeter("Time", ":.4f")
    d_losses = AverageMeter("D Loss", ":.6f")
    g_losses = AverageMeter("G Loss", ":.6f")
    pixel_losses = AverageMeter("Pixel Loss", ":6.4f")
    content_losses = AverageMeter("Content Loss", ":6.4f")
    adversarial_losses = AverageMeter("Adversarial Loss", ":6.4f")

    progress = ProgressMeter(num_batches=len(dataloader),
                             meters=[
                                 batch_time, d_losses, g_losses, pixel_losses,
                                 content_losses, adversarial_losses
                             ],
                             prefix=f"Epoch: [{epoch}]")

    # switch to train mode
    discriminator.train()
    generator.train()

    end = time.time()
    for i, (lr, hr) in enumerate(dataloader):
        # Move data to special device.
        if args.gpu is not None:
            lr = lr.cuda(args.gpu, non_blocking=True)
            hr = hr.cuda(args.gpu, non_blocking=True)
        batch_size = lr.size(0)

        # The real sample label is 1, and the generated sample label is 0.
        real_label = torch.full((batch_size, 1), 1,
                                dtype=lr.dtype).cuda(args.gpu,
                                                     non_blocking=True)
        fake_label = torch.full((batch_size, 1), 0,
                                dtype=lr.dtype).cuda(args.gpu,
                                                     non_blocking=True)

        ##############################################
        # (1) Update D network: E(hr)[fake(C(D(hr) - E(sr)C(sr)))] + E(sr)[fake(C(fake) - E(real)C(real))]
        ##############################################
        discriminator_optimizer.zero_grad()

        with amp.autocast():
            sr = generator(lr)
            # It makes the discriminator distinguish between real sample and fake sample.
            real_output = discriminator(hr)
            fake_output = discriminator(sr.detach())

            # Adversarial loss for real and fake images (relativistic average GAN)
            d_loss_real = adversarial_criterion(
                real_output - torch.mean(fake_output), real_label)
            d_loss_fake = adversarial_criterion(
                fake_output - torch.mean(real_output), fake_label)

            # Count all discriminator losses.
            d_loss = (d_loss_real + d_loss_fake) / 2

        scaler.scale(d_loss).backward()
        scaler.step(discriminator_optimizer)
        scaler.update()

        ##############################################
        # (2) Update G network: E(hr)[sr(C(D(hr) - E(sr)C(sr)))] + E(sr)[sr(C(fake) - E(real)C(real))]
        ##############################################
        generator_optimizer.zero_grad()

        with amp.autocast():
            sr = generator(lr)
            # It makes the discriminator unable to distinguish the real samples and fake samples.
            real_output = discriminator(hr.detach())
            fake_output = discriminator(sr)

            # Calculate the absolute value of pixels with L1 loss.
            pixel_loss = pixel_criterion(sr, hr.detach())
            # # The 35th layer in VGG19 is used as the feature extractor by default.
            content_loss = content_criterion(sr, hr.detach())
            # Adversarial loss for real and fake images (relativistic average GAN)
            adversarial_loss = adversarial_criterion(
                fake_output - torch.mean(real_output), real_label)

            # Count all generator losses.
            g_loss = 0.01 * pixel_loss + 1 * content_loss + 0.005 * adversarial_loss

        scaler.scale(g_loss).backward()
        scaler.step(generator_optimizer)
        scaler.update()

        # Set generator gradients to zero.
        generator.zero_grad()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # measure accuracy and record loss
        d_losses.update(d_loss.item(), lr.size(0))
        g_losses.update(g_loss.item(), lr.size(0))
        pixel_losses.update(pixel_loss.item(), lr.size(0))
        content_losses.update(content_loss.item(), lr.size(0))
        adversarial_losses.update(adversarial_loss.item(), lr.size(0))

        iters = i + epoch * len(dataloader) + 1
        writer.add_scalar("Train/D Loss", d_loss.item(), iters)
        writer.add_scalar("Train/G Loss", g_loss.item(), iters)
        writer.add_scalar("Train/Pixel Loss", pixel_loss.item(), iters)
        writer.add_scalar("Train/Content Loss", content_loss.item(), iters)
        writer.add_scalar("Train/Adversarial Loss", adversarial_loss.item(),
                          iters)

        # Output results every 100 batches.
        if i % 100 == 0:
            progress.display(i)

        # Save image every 300 batches.
        if iters % 300 == 0:
            vutils.save_image(hr.detach(),
                              os.path.join("runs", "hr", f"GAN_{iters}.bmp"))
            vutils.save_image(sr.detach(),
                              os.path.join("runs", "sr", f"GAN_{iters}.bmp"))
Beispiel #20
0
def eval_on_series(
    distr_tcn: DistributionalTCN,
    optimizer: torch.optim.Adam,
    series_tensor: torch.Tensor,
    ts_len: int,
    context_length: int,
    is_train: bool = False,
    return_predictions: bool = False,
    lead_time: int = 1,
):
    """
    Arguments
    ----------
    distr_tcn: DistributionalTCN
    otimizer: Optimizer containing parameters, learning rate, etc
    series_tensor: Time series
    ts_len: Length of time series
    context_length: Number of time steps to input
    is_train: True if time series is training set
    return_predictions: True if to return (loss, predictions), False if to return loss only
    lead_time: Number of time steps to predict ahead
    """
    loss_log = []

    ### Parallelising the training:
    trying_mini_batches = True
    if is_train:
        mini_batch_size = 64
        stride = 1

        window_length = context_length + lead_time

        unfold_layer = torch.nn.Unfold(
            kernel_size=(1, window_length), stride=stride
        )
        fold_layer = torch.nn.Fold(
            kernel_size=(1, window_length),
            stride=stride,
            output_size=(1, series_tensor.shape[-1]),
        )

        ts_windows = (
            unfold_layer(series_tensor.unsqueeze(2))
            .transpose(1, 2)
            .transpose(0, 1)
        )

        numb_mini_batches = ts_windows.shape[0] // mini_batch_size

        if trying_mini_batches:
            batch_indices = np.arange(ts_len - window_length - 1)
            # for i in tqdm(range(numb_mini_batches),  position=0, leave=True):
            for i in range(numb_mini_batches):
                idx = np.random.choice(batch_indices, mini_batch_size)
                batch_indices = np.setdiff1d(batch_indices, idx)

                ts_chunks = ts_windows[idx, :, :-lead_time]
                targets = ts_windows[idx, :, -1]

                loss_log.append(
                    train_step_from_batch(
                        ts_chunks, targets, distr_tcn, optimizer
                    )
                )

        else:
            ts_chunks = ts_windows[:, :, :-lead_time]
            targets = ts_windows[:, :, -1]

            loss_log.append(train_step_from_batch(ts_chunks, targets))

        return loss_log

    if return_predictions:
        predictions = {
            "low_lower": [],
            "lower": [],
            "median": [],
            "upper": [],
            "up_upper": [],
        }

    for i in range(ts_len - context_length - lead_time - 1):
        ts_chunk = series_tensor[:, :, i : i + context_length]
        target = series_tensor[:, :, i + context_length + lead_time - 1]

        distr_output = distr_tcn(ts_chunk.float())

        if return_predictions:
            predictions["lower"].append(distr_output.icdf(torch.tensor(0.05)))
            predictions["median"].append(distr_output.icdf(torch.tensor(0.5)))
            predictions["upper"].append(distr_output.icdf(torch.tensor(0.95)))
            predictions["low_lower"].append(
                distr_output.icdf(torch.tensor(0.01))
            )
            predictions["up_upper"].append(
                distr_output.icdf(torch.tensor(0.99))
            )

        loss = -distr_output.log_prob(target.float())
        loss_value = loss.cpu().detach().numpy()[0]
        loss_log.append(loss_value)

        if is_train:
            loss.backward()
            optimizer.step()

    if return_predictions:
        return loss_log, predictions
    return loss_log
Beispiel #21
0
def trainModel(model, trainData, validData, optimizer: torch.optim.Adam):
    print(model)
    start_time = time.time()

    def trainEpoch(epoch):
        trainData.shuffle()

        total_loss, total, total_num_correct = 0, 0, 0
        report_loss, report_total, report_num_correct = 0, 0, 0
        for i in range(len(trainData)):
            (batch_docs, batch_docs_len,
             doc_mask), (batch_querys, batch_querys_len,
                         query_mask), batch_answers, candidates = trainData[i]

            model.zero_grad()
            pred_answers, answer_probs = model(batch_docs,
                                               batch_docs_len,
                                               doc_mask,
                                               batch_querys,
                                               batch_querys_len,
                                               query_mask,
                                               answers=batch_answers,
                                               candidates=candidates)

            loss, num_correct = loss_func(batch_answers, pred_answers,
                                          answer_probs)

            loss.backward()
            for parameter in model.parameters():
                parameter.grad.data.clamp_(-5.0, 5.0)
            # update the parameters
            optimizer.step()

            total_in_minibatch = batch_answers.size(0)

            report_loss += loss.data[0] * total_in_minibatch
            report_num_correct += num_correct
            report_total += total_in_minibatch

            total_loss += loss.data[0] * total_in_minibatch
            total_num_correct += num_correct
            total += total_in_minibatch
            if i % opt.log_interval == 0:
                print(
                    "Epoch %2d, %5d/%5d; avg loss: %.2f; acc: %6.2f;  %6.0f s elapsed"
                    % (epoch, i + 1, len(trainData), report_loss /
                       report_total, report_num_correct / report_total * 100,
                       time.time() - start_time))

                report_loss = report_total = report_num_correct = 0
            del loss, pred_answers, answer_probs

        return total_loss / total, total_num_correct / total

    for epoch in range(opt.start_epoch, opt.epochs + 1):
        print('')

        #  (1) train for one epoch on the training set
        train_loss, train_acc = trainEpoch(epoch)
        print('Epoch %d:\t average loss: %.2f\t train accuracy: %g' %
              (epoch, train_loss, train_acc * 100))

        #  (2) evaluate on the validation set
        valid_loss, valid_acc = eval(model, validData)
        print('=' * 20)
        print('Evaluating on validation set:')
        print('Validation loss: %.2f' % valid_loss)
        print('Validation accuracy: %g' % (valid_acc * 100))
        print('=' * 20)

        model_state_dict = model.state_dict()
        optimizer_state_dict = optimizer.state_dict()
        #  (4) drop a checkpoint
        checkpoint = {
            'model': model_state_dict,
            'epoch': epoch,
            'optimizer': optimizer_state_dict,
            'opt': opt,
        }
        torch.save(
            checkpoint, 'models/%s_epoch%d_acc_%.2f.pt' %
            (opt.save_model, epoch, 100 * valid_acc))
Beispiel #22
0
def train_gan(dataloader: torch.utils.data.DataLoader,
              discriminator: nn.Module,
              discriminator_optimizer: torch.optim.Adam,
              generator: nn.Module,
              generator_optimizer: torch.optim.Adam,
              content_criterion: VGGLoss,
              adversarial_criterion: nn.BCELoss,
              epoch: int,
              writer: SummaryWriter,
              args: argparse.ArgumentParser.parse_args):
    batch_time = AverageMeter("Time", ":.4f")
    d_losses = AverageMeter("D Loss", ":.6f")
    g_losses = AverageMeter("G Loss", ":.6f")
    content_losses = AverageMeter("Content Loss", ":.4f")
    adversarial_losses = AverageMeter("Adversarial Loss", ":.4f")

    progress = ProgressMeter(num_batches=len(dataloader),
                             meters=[batch_time, d_losses, g_losses, content_losses, adversarial_losses],
                             prefix=f"Epoch: [{epoch}]")

    # switch to train mode
    discriminator.train()
    generator.train()

    end = time.time()
    for i, (lr, hr) in enumerate(dataloader):
        # Move data to special device.
        if args.gpu is not None:
            lr = lr.cuda(args.gpu, non_blocking=True)
            hr = hr.cuda(args.gpu, non_blocking=True)
        batch_size = lr.size(0)

        # The real sample label is 1, and the generated sample label is 0.
        real_label = torch.full((batch_size, 1), 1, dtype=lr.dtype).cuda(args.gpu, non_blocking=True)
        fake_label = torch.full((batch_size, 1), 0, dtype=lr.dtype).cuda(args.gpu, non_blocking=True)

        ##############################################
        # (1) Update D network: maximize - E(hr)[log(D(hr))] + E(lr)[log(1- D(G(lr))]
        ##############################################
        discriminator.zero_grad()

        # Generating fake high resolution images from real low resolution images.
        sr = generator(lr)

        # Adversarial loss for real and fake images (origin GAN)
        d_loss_real = adversarial_criterion(discriminator(hr), real_label)
        d_loss_fake = adversarial_criterion(discriminator(sr.detach()), fake_label)
        # Count all discriminator losses.
        d_loss = d_loss_real + d_loss_fake

        d_loss.backward()
        discriminator_optimizer.step()

        ##############################################
        # (2) Update G network: content loss + 0.001 * adversarial loss
        ##############################################
        generator.zero_grad()

        # The 36th layer in VGG19 is used as the feature extractor by default
        content_loss = content_criterion(sr, hr.detach())
        # Adversarial loss for real and fake images (origin GAN).
        adversarial_loss = adversarial_criterion(discriminator(sr), real_label)
        # Count all generator losses.
        g_loss = content_loss + 0.001 * adversarial_loss

        g_loss.backward()
        generator_optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # measure accuracy and record loss
        d_losses.update(d_loss.item(), lr.size(0))
        g_losses.update(g_loss.item(), lr.size(0))
        content_losses.update(content_loss.item(), lr.size(0))
        adversarial_losses.update(adversarial_loss.item(), lr.size(0))

        iters = i + epoch * len(dataloader) + 1
        writer.add_scalar("Train/D_Loss", d_loss.item(), iters)
        writer.add_scalar("Train/G_Loss", g_loss.item(), iters)
        writer.add_scalar("Train/Content_Loss", content_loss.item(), iters)
        writer.add_scalar("Train/Adversarial_Loss", adversarial_loss.item(), iters)

        # Output results every 100 batches.
        if i % 100 == 0:
            progress.display(i)

    # Each Epoch validates the model once.
    sr = generator(base_image)
    vutils.save_image(sr.detach(), os.path.join("runs", f"GAN_epoch_{epoch}.png"))
def model_train(model: nn.Module, train_loader: DataLoader,
                optimizer: torch.optim.Adam, num_epochs: int,
                loss_function: Callable[
                    [torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor],
                    float], device: str) -> List[float]:
    '''
    Function for training a given input model.

    Parameters
    ----------
    model : nn.Model
        Model, i.e. varational autoencoder, which needs to be trained.

    train_loader : DataLoader
        DataLoder of the custom training set used training utilities such as mini-batches and shuffling.

    optimizer : torch.optim.Adam
        Adam optimizer for the recalculation of the neural network weights by minimizing the calculated loss.

    num_epochs : int
        Number of training epochs of the model.

    loss_function : Callable[[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor], float]
        Custom loss input function for error calculation.

    device : str
        Device on which the computation is performed. Typically it is either cpu or cuda (gpu).


    Returns
    -------
    running_rec_loss : list
        Returns the list of values relating to the average error at the end of each epoch.
    
    '''

    running_rec_loss = []
    loss = 0
    model.train()

    tqdm_bar = tqdm(range(1, num_epochs + 1), desc="epoch [loss: ...]")
    for epoch in tqdm_bar:
        train_loss_averager = make_averager()
        batch_bar = tqdm(train_loader,
                         leave=False,
                         desc='batch',
                         total=len(train_loader))

        for batch in batch_bar:
            batch = batch.float()
            batch = batch.to(device)
            batch_reconstructed, latent_mu, latent_logvar = model(batch)
            loss = loss_function(batch_reconstructed, batch, latent_mu,
                                 latent_logvar)

            #Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            refresh_bar(
                batch_bar,
                f"train batch [loss: {train_loss_averager(loss.item()):.3f}]")

        refresh_bar(tqdm_bar, f"epoch [loss: {train_loss_averager(None):.3f}]")
        running_rec_loss.append(train_loss_averager(None))

    return running_rec_loss