Exemple #1
0
def validation(model, valid_loader, criterion):

    model.eval()
    losses = AverageMeter()

    for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map,
            meta) in enumerate(valid_loader):
        print(meta['image_id'])
        img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device(
            img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map)

        output = model(img)

        tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \
            criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask)
        loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss
        losses.update(loss.item())

        if cfg.viz and i < cfg.vis_num:
            visualize_network_output(output,
                                     tr_mask,
                                     tcl_mask,
                                     prefix='val_{}'.format(i))

        if i % cfg.display_freq == 0:
            print(
                'Validation: - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}'
                .format(loss.item(), tr_loss.item(), tcl_loss.item(),
                        sin_loss.item(), cos_loss.item(), radii_loss.item()))

    print('Validation Loss: {}'.format(losses.avg))
Exemple #2
0
def train(model, train_loader, train_data, test_data, val_data, scheduler, optimizer, epoch):

    global train_step

    global accuracy_tests
    global accuracy_trains
    global accuracy_vals
    losses = AverageMeter(max=100)
    model.train()
    # scheduler.step()
    print('Epoch: {} : LR = {}'.format(epoch, scheduler.get_lr()))
    for i, data in enumerate(train_loader):
        train_step += 1
        data = to_device(data)
        if data.shape[0] != cfg.batch_size:
            continue
        output = model(data[:, 1:])
        target = data[:, 0].long()
        loss = F.nll_loss(output, target)

        optimizer.zero_grad()
        loss.backward()

        optimizer.step()
        losses.update(loss.item())
        gc.collect()

        if i % cfg.display_freq == 0:
            print("({:d} / {:d}), loss: {:.3f}".format(i, len(train_loader), loss.item()))

    if epoch % cfg.save_freq == 0:
        labels_test = test_data[:, 0].long()
        output_test = model(test_data[:, 1:])
        pred_test = output_test.data.max(1, keepdim=True)[1]
        correct_test = pred_test.eq(labels_test.data.view_as(pred_test)).cpu().sum()
        accuracy_test = correct_test*100.0/labels_test.shape[0]
        accuracy_tests.append(round(accuracy_test.item(), 3))

        labels_train = train_data[:, 0].long()
        output_train = model(train_data[:, 1:])
        pred_train = output_train.data.max(1, keepdim=True)[1]
        correct_train = pred_train.eq(labels_train.data.view_as(pred_train)).cpu().sum()
        accuracy_train = correct_train * 100.0 / labels_train.shape[0]
        accuracy_trains.append(round(accuracy_train.item(), 3))

        labels_val = val_data[:, 0].long()
        output_val = model(val_data[:, 1:])
        pred_val = output_val.data.max(1, keepdim=True)[1]
        correct_val = pred_val.eq(labels_val.data.view_as(pred_val)).cpu().sum()
        accuracy_val = correct_val * 100.0 / labels_val.shape[0]
        accuracy_vals.append(round(accuracy_val.item(), 3))
        print("accuracy_train: {}; accuracy_val: {}; accuracy_test: {}"
              .format(accuracy_train, accuracy_val, accuracy_test))

    # if epoch % cfg.save_freq == 0:
    #     save_model(model, epoch, scheduler.get_lr(), optimizer)

    print('Training Loss: {}'.format(losses.avg))
Exemple #3
0
def validation(model, valid_loader, criterion):
    with torch.no_grad():
        model.eval()
        losses = AverageMeter()
        reg_losses = AverageMeter()
        center_loss = AverageMeter()
        region_loss = AverageMeter()

        for i, (img, reg_mask, meta) in enumerate(valid_loader):
            img, reg_mask = to_device(img, reg_mask)

            output = model(img)

            loss_reg, loss_dice_center, loss_dice_region = criterion(
                output, reg_mask)
            loss = loss_reg + loss_dice_center + loss_dice_region

            losses.update(loss.item())
            reg_losses.update(loss_reg.item())
            center_loss.update(loss_dice_center.item())
            region_loss.update(loss_dice_region.item())

            if cfg.visualization and i % cfg.visualization_frequency == 0:
                visualize_network_output(img, output, reg_mask, mode='val')

            print(
                'Validation: - Loss: {:.4f} - Reg_Loss: {:.4f} - Center_Dice_Loss: {:.4f} - Region_Dice_Loss: {:.4f}'
                .format(loss.item(), loss_reg.item(), loss_dice_center.item(),
                        loss_dice_region.item()))

        print('Validation Loss: {}'.format(losses.avg))
        print('Regression Loss: {}'.format(reg_losses.avg))
        print('Center Dice Loss: {}'.format(center_loss.avg))
        print('Region Dice Loss: {}'.format(region_loss.avg))
Exemple #4
0
def train(model, train_loader, criterion, scheduler, optimizer, epoch):

    losses = AverageMeter()
    reg_losses = AverageMeter()
    center_loss = AverageMeter()
    region_loss = AverageMeter()

    model.train()

    print('Epoch: {} : LR = {}'.format(epoch, optimizer.param_groups[0]['lr']))

    for i, (img, reg_mask, meta) in enumerate(train_loader):
        scheduler.step()
        if img is None:
            print("Exception loading data! Preparing loading next batch data!")
            continue

        img, reg_mask = to_device(img, reg_mask)

        output = model(img)
        loss_reg, loss_dice_center, loss_dice_region = criterion(
            output, reg_mask)
        loss = loss_reg + loss_dice_center + loss_dice_region

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.update(loss.item())
        reg_losses.update(loss_reg.item())
        center_loss.update(loss_dice_center.item())
        region_loss.update(loss_dice_region.item())

        if cfg.visualization and i % cfg.visualization_frequency == 0:
            visualize_network_output(img, output, reg_mask, mode='train')

        print(
            '[{:d} | {:d}] - Loss: {:.4f} - Reg_Loss: {:.4f} - Center_Dice_Loss: {:.4f} - Region_Dice_Loss: {:.4f} - LR: {:e}'
            .format(i, len(train_loader), loss.item(), loss_reg.item(),
                    loss_dice_center.item(), loss_dice_region.item(),
                    optimizer.param_groups[0]['lr']))

    if epoch % cfg.save_frequency == 0:
        save_model(model, epoch, scheduler.get_lr(), optimizer)
def train(model, train_loader, criterion, scheduler, optimizer, epoch):

    start = time.time()
    losses = AverageMeter()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    end = time.time()
    model.train()

    for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map,
            meta) in enumerate(train_loader):
        data_time.update(time.time() - end)

        img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device(
            img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map)

        output = model(img)
        tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \
            criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask)
        loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss

        # backward
        scheduler.step()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.update(loss.item())
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if cfg.viz and i < cfg.vis_num:
            visualize_network_output(output,
                                     tr_mask,
                                     tcl_mask,
                                     prefix='train_{}'.format(i))

        if i % cfg.display_freq == 0:
            print(
                'Epoch: [ {} ][ {:03d} / {:03d} ] - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}'
                .format(epoch, i, len(train_loader), loss.item(),
                        tr_loss.item(), tcl_loss.item(), sin_loss.item(),
                        cos_loss.item(), radii_loss.item()))
    if epoch % cfg.save_freq == 0 and epoch > 0:
        save_model(model, epoch, scheduler.get_lr())

    print('Training Loss: {}'.format(losses.avg))
Exemple #6
0
def train(train_loader,
          model,
          criterion,
          optimizer,
          writer,
          epoch,
          no_cuda=False,
          log_interval=25,
          **kwargs):
    """
    Training routine

    Parameters
    ----------
    :param train_loader : torch.utils.data.DataLoader
        The dataloader of the train set.

    :param model : torch.nn.module
        The network model being used.

    :param criterion : torch.nn.loss
        The loss function used to compute the loss of the model.

    :param optimizer : torch.optim
        The optimizer used to perform the weight update.

    :param writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.

    :param epoch : int
        Number of the epoch (for logging purposes).

    :param no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.

    :param log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    :return:
        None
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    data_time = AverageMeter()

    # Switch to train mode (turn on dropout & stuff)
    model.train()

    # Iterate over whole training set
    end = time.time()
    pbar = tqdm(enumerate(train_loader),
                total=len(train_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, data in pbar:
        # pmu
        is_sequence = True if len(data) == 3 else False
        if is_sequence:
            in_data, length, target = sort_sequences_desc_order(data)
        else:
            in_data, target = data
        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            in_data = in_data.cuda(async=True)
            target = target.cuda(async=True)
            # pmu
            length = length.cuda(async=True) if is_sequence else None

        # pmu del
        # Convert the input and its labels to Torch Variables
        input_var = in_data  # torch.autograd.Variable(input)
        target_var = target  # torch.autograd.Variable(target)

        # Compute output
        # pmu
        if is_sequence:
            model.zero_grad()
            output = model((input_var, length))
            # output = output.view(output.size(0), 2)
            # target_var = target_var.view(output.size(0))
        else:
            output = model(input_var)

        # Compute and record the loss
        loss = criterion(output, target_var)
        losses.update(loss.item(), input_var.size(0))

        # Compute and record the accuracy
        # acc1, acc5 = accuracy(output.data, target, topk=(1, 5))
        acc1 = accuracy(output.data, target_var, topk=(1, ))[0]

        top1.update(acc1[0], input_var.size(0))
        # top5.update(acc5[0], input.size(0))

        # Add loss and accuracy to Tensorboard
        if multi_run == None:
            writer.add_scalar('train/mb_loss', loss.item(),
                              epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/mb_accuracy',
                              acc1.cpu().numpy(),
                              epoch * len(train_loader) + batch_idx)
        else:
            writer.add_scalar('train/mb_loss_{}'.format(multi_run),
                              loss.item(),
                              epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/mb_accuracy_{}'.format(multi_run),
                              acc1.cpu().numpy(),
                              epoch * len(train_loader) + batch_idx)

        # Reset gradient
        optimizer.zero_grad()
        # Compute gradients
        loss.backward()
        # Perform a step by updating the weights
        optimizer.step()

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Log to console
        if batch_idx % log_interval == 0:
            pbar.set_description('train epoch [{0}][{1}/{2}]\t'.format(
                epoch, batch_idx, len(train_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=losses),
                Acc1='{top1.avg:.3f}\t'.format(top1=top1),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))
Exemple #7
0
def train(train_loader,
          model,
          criterion,
          optimizer,
          writer,
          epoch,
          no_cuda=False,
          log_interval=25,
          **kwargs):
    """
    Training routine

    Parameters
    ----------
    train_loader : torch.utils.data.DataLoader
        The dataloader of the train set.
    model : torch.nn.module
        The network model being used.
    criterion : torch.nn.loss
        The loss function used to compute the loss of the model.
    optimizer : torch.optim
        The optimizer used to perform the weight update.
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes).
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    ----------
    top1.avg : float
        Accuracy of the model of the evaluated split
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    loss_meter = AverageMeter()
    acc_meter = AverageMeter()
    data_time = AverageMeter()

    # Switch to train mode (turn on dropout & stuff)
    model.train()

    # Iterate over whole training set
    end = time.time()
    pbar = tqdm(enumerate(train_loader),
                total=len(train_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (input, target) in pbar:

        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(async=True)
            target = target.cuda(async=True)

        # Convert the input and its labels to Torch Variables
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        acc, loss = train_one_mini_batch(model, criterion, optimizer,
                                         input_var, target_var, loss_meter,
                                         acc_meter)

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar('train/mb_loss', loss.data.item(),
                              epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/mb_accuracy',
                              acc.cpu().numpy(),
                              epoch * len(train_loader) + batch_idx)
        else:
            writer.add_scalar('train/mb_loss_{}'.format(multi_run),
                              loss.data.item(),
                              epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/mb_accuracy_{}'.format(multi_run),
                              acc.cpu().numpy(),
                              epoch * len(train_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Log to console
        if batch_idx % log_interval == 0:
            pbar.set_description('train epoch [{0}][{1}/{2}]\t'.format(
                epoch, batch_idx, len(train_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=loss_meter),
                Acc1='{acc_meter.avg:.3f}\t'.format(acc_meter=acc_meter),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

    # Logging the epoch-wise accuracy
    if multi_run is None:
        writer.add_scalar('train/accuracy', acc_meter.avg, epoch)
    else:
        writer.add_scalar('train/accuracy_{}'.format(multi_run), acc_meter.avg,
                          epoch)

    logging.debug(
        'Train epoch[{}]: '
        'Acc@1={acc_meter.avg:.3f}\t'
        'Loss={loss.avg:.4f}\t'
        'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'.
        format(epoch,
               batch_time=batch_time,
               data_time=data_time,
               loss=loss_meter,
               acc_meter=acc_meter))

    return acc_meter.avg
Exemple #8
0
def _evaluate(data_loader,
              model,
              criterion,
              writer,
              epoch,
              logging_label,
              no_cuda=False,
              log_interval=10,
              **kwargs):
    """
    The evaluation routine

    Parameters
    ----------
    :param data_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set

    :param model : torch.nn.module
        The network model being used

    :param criterion: torch.nn.loss
        The loss function used to compute the loss of the model

    :param writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.

    :param epoch : int
        Number of the epoch (for logging purposes)

    :param logging_label : string
        Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages.

    :param no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.

    :param log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    :return:
        None
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    # Empty lists to store the predictions and target values
    preds = []
    targets = []

    pbar = tqdm(enumerate(data_loader),
                total=len(data_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, data in pbar:
        # pmu
        is_sequence = True if len(data) == 3 else False
        if is_sequence:
            in_data, length, target = sort_sequences_desc_order(data)
        else:
            in_data, target = data

        # Measure data loading time
        data_time.update(time.time() - end)
        # Moving data to GPU
        if not no_cuda:
            in_data = in_data.cuda(async=True)
            target = target.cuda(async=True)
            # pmu
            length = length.cuda(async=True) if is_sequence else None

        # pmu
        # Convert the input and its labels to Torch Variables
        input_var = in_data  # torch.autograd.Variable(input, volatile=True)
        target_var = target  # torch.autograd.Variable(target, volatile=True)

        # Compute output
        # pmu
        if is_sequence:
            model.zero_grad()
            output = model((input_var, length))
            output = output.view(output.size(0), 2)
            target_var = target_var.view(output.size(0))
        else:
            output = model(input_var)

        # Compute and record the loss
        loss = criterion(output, target_var)
        losses.update(loss.item(), input_var.size(0))

        # Compute and record the accuracy
        acc1 = accuracy(output, target_var, topk=(1, ))[0]
        top1.update(acc1[0], target_var.size(0))

        # Get the predictions
        _ = [
            preds.append(item) for item in
            [np.argmax(item) for item in output.data.cpu().numpy()]
        ]
        _ = [targets.append(item) for item in target_var.cpu().numpy()]

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar(logging_label + '/mb_loss', loss.item(),
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(logging_label + '/mb_accuracy',
                              acc1.cpu().numpy(),
                              epoch * len(data_loader) + batch_idx)
        else:
            writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run),
                              loss.item(),
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(
                logging_label + '/mb_accuracy_{}'.format(multi_run),
                acc1.cpu().numpy(),
                epoch * len(data_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description(logging_label +
                                 ' epoch [{0}][{1}/{2}]\t'.format(
                                     epoch, batch_idx, len(data_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=losses),
                Acc1='{top1.avg:.3f}\t'.format(top1=top1),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))
Exemple #9
0
def train(model, train_loader, criterion, scheduler, optimizer, epoch,
          summary_writer):

    start = time.time()
    losses = AverageMeter()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    end = time.time()
    model.train()
    global total_iter

    for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map,
            meta) in enumerate(train_loader):
        data_time.update(time.time() - end)

        img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device(
            img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map)

        output = model(img)
        tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \
            criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask, total_iter)
        loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss

        # backward
        # scheduler.step()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.update(loss.item())

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if cfg.viz and i < cfg.vis_num:
            visualize_network_output(output,
                                     tr_mask,
                                     tcl_mask,
                                     prefix='train_{}'.format(i))

        if i % cfg.display_freq == 0:
            print(
                'Epoch: [ {} ][ {:03d} / {:03d} ] - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f} - {:.2f}s/step'
                .format(epoch, i, len(train_loader), loss.item(),
                        tr_loss.item(), tcl_loss.item(), sin_loss.item(),
                        cos_loss.item(), radii_loss.item(), batch_time.avg))

        # write summary
        if total_iter % cfg.summary_freq == 0:
            print('Summary in {}'.format(
                os.path.join(cfg.summary_dir, cfg.exp_name)))
            tr_pred = output[:, 0:2].softmax(dim=1)[:, 1:2]
            tcl_pred = output[:, 2:4].softmax(dim=1)[:, 1:2]
            summary_writer.add_image('input_image',
                                     vutils.make_grid(img, normalize=True),
                                     total_iter)
            summary_writer.add_image(
                'tr/tr_pred', vutils.make_grid(tr_pred * 255, normalize=True),
                total_iter)
            summary_writer.add_image(
                'tr/tr_mask',
                vutils.make_grid(
                    torch.unsqueeze(tr_mask * train_mask, 1) * 255),
                total_iter)
            summary_writer.add_image(
                'tcl/tcl_pred', vutils.make_grid(tcl_pred * 255,
                                                 normalize=True), total_iter)
            summary_writer.add_image(
                'tcl/tcl_mask',
                vutils.make_grid(
                    torch.unsqueeze(tcl_mask * train_mask, 1) * 255),
                total_iter)
            summary_writer.add_scalar('learning_rate',
                                      optimizer.param_groups[0]['lr'],
                                      total_iter)
            summary_writer.add_scalar('model/tr_loss', tr_loss.item(),
                                      total_iter)
            summary_writer.add_scalar('model/tcl_loss', tcl_loss.item(),
                                      total_iter)
            summary_writer.add_scalar('model/sin_loss', sin_loss.item(),
                                      total_iter)
            summary_writer.add_scalar('model/cos_loss', cos_loss.item(),
                                      total_iter)
            summary_writer.add_scalar('model/radii_loss', radii_loss.item(),
                                      total_iter)
            summary_writer.add_scalar('model/loss', loss.item(), total_iter)

        total_iter += 1

    print('Speed: {}s /step, {}s /epoch'.format(batch_time.avg,
                                                time.time() - start))

    if epoch % cfg.save_freq == 0:
        save_model(model, optimizer, scheduler, epoch)

    print('Training Loss: {}'.format(losses.avg))
Exemple #10
0
def _evaluate(data_loader,
              model,
              criterion,
              writer,
              epoch,
              logging_label,
              no_cuda=False,
              log_interval=10,
              **kwargs):
    """
    The evaluation routine

    Parameters
    ----------
    data_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set
    model : torch.nn.module
        The network model being used
    criterion: torch.nn.loss
        The loss function used to compute the loss of the model
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes)
    logging_label : string
        Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages.
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    -------
    top1.avg : float
        Accuracy of the model of the evaluated split
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    # Empty lists to store the predictions and target values
    preds = []
    targets = []

    pbar = tqdm(enumerate(data_loader),
                total=len(data_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (input, target) in pbar:

        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(async=True)
            target = target.cuda(async=True)

        # Convert the input and its labels to Torch Variables
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # Compute output
        output = model(input_var)

        # Compute and record the loss
        loss = criterion(output, target_var)
        losses.update(loss.data[0], input.size(0))

        # Apply sigmoid and take everything above a threshold of 0.5
        squashed_output = torch.nn.Sigmoid()(output).data.cpu().numpy()
        target_vals = target.cpu().numpy().astype(np.int)

        # jss = compute_jss(target_vals, get_preds_from_minibatch(squashed_output))
        # top1.update(jss, input.size(0))

        # Store results of each minibatch
        _ = [
            preds.append(item)
            for item in get_preds_from_minibatch(squashed_output)
        ]
        _ = [targets.append(item) for item in target.cpu().numpy()]

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar(logging_label + '/mb_loss', loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            # writer.add_scalar(logging_label + '/mb_jaccard_similarity', jss, epoch * len(data_loader) + batch_idx)
        else:
            writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run),
                              loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            # writer.add_scalar(logging_label + '/mb_jaccard_similarity_{}'.format(multi_run), jss,
            #                   epoch * len(data_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description(logging_label +
                                 ' epoch [{0}][{1}/{2}]\t'.format(
                                     epoch, batch_idx, len(data_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=losses),
                # JSS='{top1.avg:.3f}\t'.format(top1=top1),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

    # Generate a classification report for each epoch
    targets = np.array(targets).astype(np.int)
    preds = np.array(preds).astype(np.int)
    _log_classification_report(data_loader, epoch, preds, targets, writer)
    jss_epoch = compute_jss(targets, preds)
    # try:
    #     np.testing.assert_approx_equal(jss_epoch, top1.avg)
    # except:
    #     logging.error('Computed JSS scores do not match')
    #     logging.error('JSS: {} Avg: {}'.format(jss_epoch, top1.avg))

    # # Logging the epoch-wise JSS
    if multi_run is None:
        writer.add_scalar(logging_label + '/loss', losses.avg, epoch)
        writer.add_scalar(logging_label + '/jaccard_similarity', jss_epoch,
                          epoch)
    else:
        writer.add_scalar(logging_label + '/loss_{}'.format(multi_run),
                          losses.avg, epoch)
        writer.add_scalar(
            logging_label + '/jaccard_similarity_{}'.format(multi_run),
            jss_epoch, epoch)

    logging.info(
        _prettyprint_logging_label(logging_label) + ' epoch[{}]: '
        'JSS={jss_epoch:.3f}\t'
        'Loss={loss.avg:.4f}\t'
        'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'.
        format(epoch,
               batch_time=batch_time,
               data_time=data_time,
               loss=losses,
               jss_epoch=jss_epoch))

    return jss_epoch
def train(train_loader, model, criterion, optimizer, observer, observer_criterion, observer_optimizer,
          writer, epoch, no_cuda=False, log_interval=25, **kwargs):
    """
    Training routine

    Parameters
    ----------
    :param train_loader : torch.utils.data.DataLoader
        The dataloader of the train set.

    :param model : torch.nn.module
        The network model being used.

    :param criterion : torch.nn.loss
        The loss function used to compute the loss of the model.

    :param optimizer : torch.optim
        The optimizer used to perform the weight update.

    :param writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.

    :param epoch : int
        Number of the epoch (for logging purposes).

    :param no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.

    :param log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    :return:
        None
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    loss_meter = AverageMeter()
    observer_loss_meter = AverageMeter()
    acc_meter = AverageMeter()
    observer_acc_meter = AverageMeter()
    data_time = AverageMeter()

    # Switch to train mode (turn on dropout & stuff)
    model.train()

    # Create a random object
    random_seed = 42
    random1 = np.random.RandomState(random_seed)
    num_classes = observer.module.output_channels

    # Iterate over whole training set
    end = time.time()
    pbar = tqdm(enumerate(train_loader), total=len(train_loader), unit='batch', ncols=150, leave=False)
    for batch_idx, (input, target) in pbar:

        # Measure data loading time
        data_time.update(time.time() - end)

        # Generate the shuffled labels
        # random1 = np.random.RandomState(random_seed)
        random_target = torch.LongTensor(random1.randint(0, num_classes, len(input)))

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(async=True)
            target = target.cuda(async=True)
            random_target = random_target.cuda(async=True)

        # Convert the input and its labels to Torch Variables
        input_var = torch.autograd.Variable(input)
        random_target_var = torch.autograd.Variable(random_target)

        acc, loss = train_one_mini_batch(model, criterion, optimizer, input_var, random_target_var, loss_meter, acc_meter)

        # Update random if necessary
        # if acc[0] > 80:
        #     logging.info('Random seed updated!')
        #     random_seed = random1.randint(0)

        input_features_var =  torch.autograd.Variable(model.module.features.data)
        target_var = torch.autograd.Variable(target)

        observer_acc, observer_loss = train_one_mini_batch(observer, observer_criterion, observer_optimizer, input_features_var, target_var, observer_loss_meter, observer_acc_meter)

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar('train/mb_loss', loss.data[0], epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/mb_accuracy', acc.cpu().numpy(), epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/obs_mb_loss', observer_loss.data[0], epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/obs_mb_accuracy', observer_acc.cpu().numpy(), epoch * len(train_loader) + batch_idx)
        else:
            writer.add_scalar('train/mb_loss_{}'.format(multi_run), loss.data[0],
                              epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/mb_accuracy_{}'.format(multi_run), acc.cpu().numpy(),
                              epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/obs_mb_loss_{}'.format(multi_run), observer_loss.data[0],
                              epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/obs_mb_accuracy_{}'.format(multi_run), observer_acc.cpu().numpy(),
                              epoch * len(train_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Log to console
        if batch_idx % log_interval == 0:
            pbar.set_description('train epoch [{0}][{1}/{2}]\t'.format(epoch, batch_idx, len(train_loader)))

            pbar.set_postfix(Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                             Loss='{loss.avg:.4f}\t'.format(loss=loss_meter),
                             Acc1='{acc_meter.avg:.3f}\t'.format(acc_meter=acc_meter),
                             Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

    # Logging the epoch-wise accuracy
    if multi_run is None:
        writer.add_scalar('train/accuracy', acc_meter.avg, epoch)
        writer.add_scalar('train/obs_accuracy', observer_acc_meter.avg, epoch)
    else:
        writer.add_scalar('train/accuracy_{}'.format(multi_run), acc_meter.avg, epoch)
        writer.add_scalar('train/obs_accuracy_{}'.format(multi_run), observer_acc_meter.avg, epoch)

    logging.debug('Train epoch[{}]: '
                  'Acc@1={acc_meter.avg:.3f}\t'
                  'Loss={loss.avg:.4f}\t'
                  'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'
                  .format(epoch, batch_time=batch_time, data_time=data_time, loss=loss_meter, acc_meter=acc_meter))

    return acc_meter.avg
Exemple #12
0
def validate(val_loader, model, criterion):
    with torch.no_grad():
        model.eval()
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        running_metric_text = runningScore(2)
        running_metric_kernel = runningScore(2)

        end = time.time()
        for batch_idx, (imgs, gt_texts, gt_kernels,
                        training_masks) in enumerate(val_loader):
            data_time.update(time.time() - end)

            imgs = Variable(imgs.cuda())
            gt_texts = Variable(gt_texts.cuda())
            gt_kernels = Variable(gt_kernels.cuda())
            training_masks = Variable(training_masks.cuda())

            outputs = model(imgs)
            texts = outputs[:, 0, :, :]
            kernels = outputs[:, 1:, :, :]

            selected_masks = ohem_batch(texts, gt_texts, training_masks)
            selected_masks = Variable(selected_masks.cuda())

            loss_text = criterion(texts, gt_texts, selected_masks)

            loss_kernels = []
            mask0 = torch.sigmoid(texts).data.cpu().numpy()
            mask1 = training_masks.data.cpu().numpy()
            selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32')
            selected_masks = torch.from_numpy(selected_masks).float()
            selected_masks = Variable(selected_masks.cuda())
            for i in range(6):
                kernel_i = kernels[:, i, :, :]
                gt_kernel_i = gt_kernels[:, i, :, :]
                loss_kernel_i = criterion(kernel_i, gt_kernel_i,
                                          selected_masks)
                loss_kernels.append(loss_kernel_i)
            loss_kernel = sum(loss_kernels) / len(loss_kernels)

            loss = 0.7 * loss_text + 0.3 * loss_kernel
            losses.update(loss.item(), imgs.size(0))

            score_text = cal_text_score(texts, gt_texts, training_masks,
                                        running_metric_text)
            score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts,
                                            training_masks,
                                            running_metric_kernel)

            batch_time.update(time.time() - end)
            end = time.time()

            if batch_idx % 5 == 0:
                output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min '.format(
                    batch=batch_idx + 1,
                    size=len(val_loader),
                    bt=batch_time.avg,
                    total=batch_time.avg * batch_idx / 60.0,
                    eta=batch_time.avg * (len(val_loader) - batch_idx) / 60.0)
                print(output_log)
                sys.stdout.flush()

    return (float(losses.avg), float(score_text['Mean Acc']),
            float(score_kernel['Mean Acc']), float(score_text['Mean IoU']),
            float(score_kernel['Mean IoU']))
Exemple #13
0
def train(train_loader,
          model,
          criterion,
          optimizer,
          writer,
          epoch,
          no_cuda=False,
          log_interval=25,
          **kwargs):
    """
    Training routine

    Parameters
    ----------
    train_loader : torch.utils.data.DataLoader
        The dataloader of the train set.
    model : torch.nn.module
        The network model being used.
    criterion : torch.nn.loss
        The loss function used to compute the loss of the model.
    optimizer : torch.optim
        The optimizer used to perform the weight update.
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes).
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    ----------
    top1.avg : float
        Accuracy of the model of the evaluated split
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    loss_meter = AverageMeter()
    jss_meter = AverageMeter()
    data_time = AverageMeter()

    # Switch to train mode (turn on dropout & stuff)
    model.train()

    # Empty lists to store the predictions and target values
    preds = []
    targets = []

    # Iterate over whole training set
    end = time.time()
    pbar = tqdm(enumerate(train_loader),
                total=len(train_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (input, target) in pbar:

        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(async=True)
            target = target.cuda(async=True)

        # Convert the input and its labels to Torch Variables
        input_var = torch.autograd.Variable(input)
        target_var = torch.autograd.Variable(target)

        jss, loss, target_vals, pred_vals = train_one_mini_batch(
            model, criterion, optimizer, input_var, target_var, loss_meter,
            jss_meter)

        # Store results of each minibatch
        _ = [preds.append(item) for item in pred_vals]
        _ = [targets.append(item) for item in target_vals]

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar('train/mb_loss', loss.data[0],
                              epoch * len(train_loader) + batch_idx)
            # writer.add_scalar('train/mb_jaccard_similarity', jss, epoch * len(train_loader) + batch_idx)
        else:
            writer.add_scalar('train/mb_loss_{}'.format(multi_run),
                              loss.data[0],
                              epoch * len(train_loader) + batch_idx)
            # writer.add_scalar('train/mb_jaccard_similarity_{}'.format(multi_run), jss,
            #                   epoch * len(train_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Log to console
        if batch_idx % log_interval == 0:
            pbar.set_description('train epoch [{0}][{1}/{2}]\t'.format(
                epoch, batch_idx, len(train_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=loss_meter),
                # JSS='{jss_meter.avg:.3f}\t'.format(jss_meter=jss_meter),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

    # Generate the epoch wise JSS
    targets = np.array(targets).astype(np.int)
    preds = np.array(preds).astype(np.int)
    jss_epoch = compute_jss(targets, preds)
    # try:
    #     np.testing.assert_approx_equal(jss_epoch, jss_meter.avg)
    # except:
    #     logging.error('Computed JSS scores do not match')
    #     logging.error('JSS: {} Avg: {}'.format(jss_epoch, jss_meter.avg))

    # Logging the epoch-wise accuracy
    if multi_run is None:
        writer.add_scalar('train/loss', loss_meter.avg, epoch)
        writer.add_scalar('train/jaccard_similarity', jss_epoch, epoch)
    else:
        writer.add_scalar('train/loss_{}'.format(multi_run), loss_meter.avg,
                          epoch)
        writer.add_scalar('train/jaccard_similarity_{}'.format(multi_run),
                          jss_epoch, epoch)

    logging.debug(
        'Train epoch[{}]: '
        'JSS={jss_epoch:.3f}\t'
        'Loss={loss.avg:.4f}\t'
        'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'.
        format(epoch,
               batch_time=batch_time,
               data_time=data_time,
               loss=loss_meter,
               jss_epoch=jss_epoch))

    return jss_epoch
Exemple #14
0
def _evaluate(data_loader,
              model,
              criterion,
              writer,
              epoch,
              logging_label,
              no_cuda=False,
              log_interval=10,
              **kwargs):
    """
    The evaluation routine

    Parameters
    ----------
    data_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set
    model : torch.nn.module
        The network model being used
    criterion: torch.nn.loss
        The loss function used to compute the loss of the model
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes)
    logging_label : string
        Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages.
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    -------
    top1.avg : float
        Accuracy of the model of the evaluated split
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    pbar = tqdm(enumerate(data_loader),
                total=len(data_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (input, _) in pbar:

        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(async=True)

        # Convert the input to Torch Variables
        input_var = torch.autograd.Variable(input, volatile=True)

        # Compute output
        output = model(input_var)

        # Compute and record the loss
        loss = criterion(output, input_var)
        losses.update(loss.data[0], input.size(0))

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar(logging_label + '/mb_loss', loss.data[0],
                              epoch * len(data_loader) + batch_idx)
        else:
            writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run),
                              loss.data[0],
                              epoch * len(data_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description(logging_label +
                                 ' epoch [{0}][{1}/{2}]\t'.format(
                                     epoch, batch_idx, len(data_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=losses),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))
    input_img = torchvision.utils.make_grid(input_var[:25].data.cpu(),
                                            nrow=5,
                                            normalize=False,
                                            scale_each=False).permute(
                                                1, 2, 0).numpy()
    output_img = torchvision.utils.make_grid(output[:25].data.cpu(),
                                             nrow=5,
                                             normalize=False,
                                             scale_each=False).permute(
                                                 1, 2, 0).numpy()
    save_image_and_log_to_tensorboard(writer,
                                      tag=logging_label + '/input_image',
                                      image=input_img)
    save_image_and_log_to_tensorboard(writer,
                                      tag=logging_label + '/output_image',
                                      image=output_img,
                                      global_step=epoch)

    return losses.avg
def train(train_loader,
          model,
          criterion,
          optimizer,
          writer,
          epoch,
          no_cuda,
          log_interval=25,
          **kwargs):
    """
    Training routine

    Parameters
    ----------
    train_loader : torch.utils.data.DataLoader
        The dataloader of the train set.
    model : torch.nn.module
        The network model being used.
    criterion : torch.nn.loss
        The loss function used to compute the loss of the model.
    optimizer : torch.optim
        The optimizer used to perform the weight update.
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes).
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    ----------
    int
        Placeholder 0. In the future this should become the FPR95
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()

    # Switch to train mode (turn on dropout & stuff)
    model.train()

    # Iterate over whole training set
    end = time.time()
    pbar = tqdm(enumerate(train_loader),
                total=len(train_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (data_a, data_p, data_n) in pbar:

        if len(data_a.size()) == 5:
            bs, ncrops, c, h, w = data_a.size()

            data_a = data_a.view(-1, c, h, w)
            data_p = data_p.view(-1, c, h, w)
            data_n = data_n.view(-1, c, h, w)

        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            data_a, data_p, data_n = data_a.cuda(
                non_blocking=True), data_p.cuda(
                    non_blocking=True), data_n.cuda(non_blocking=True)

        # Compute output
        out_a, out_p, out_n = model(data_a), model(data_p), model(data_n)

        if len(data_a.size()) == 5:
            out_a = out_a.view(bs, ncrops, -1).mean(1)
            out_p = out_p.view(bs, ncrops, -1).mean(1)
            out_n = out_n.view(bs, ncrops, -1).mean(1)

        # Compute and record the loss
        loss = criterion(out_p, out_a, out_n)

        losses.update(loss.item(), data_a.size(0))

        # Reset gradient
        optimizer.zero_grad()
        # Compute gradients
        loss.backward()
        # Perform a step by updating the weights
        optimizer.step()

        # Log to console
        if batch_idx % log_interval == 0:
            pbar.set_description(
                'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data_a), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), losses.avg))

        # Add mb loss to Tensorboard
        if multi_run is None:
            writer.add_scalar('train/mb_loss', loss.item(),
                              epoch * len(train_loader) + batch_idx)
        else:
            writer.add_scalar('train/mb_loss_{}'.format(multi_run),
                              loss.item(),
                              epoch * len(train_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

    return 0
Exemple #16
0
def _evaluate(data_loader,
              model,
              criterion,
              writer,
              epoch,
              logging_label,
              no_cuda=False,
              log_interval=10,
              **kwargs):
    """
    The evaluation routine

    Parameters
    ----------
    data_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set
    model : torch.nn.module
        The network model being used
    criterion: torch.nn.loss
        The loss function used to compute the loss of the model
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes)
    logging_label : string
        Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages.
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    -------
    top1.avg : float
        Accuracy of the model of the evaluated split
    """
    #TODO All parts computing the accuracy are commented out. It is necessary to
    #TODO implement a 2D softmax and instead of regressing the output class have it
    #TODO work with class labels. Notice that, however, it would be
    #TODO of interest leaving open the possibility to work with soft labels
    #TODO (e.g. the ground truth for pixel X,Y is an array of probabilities instead
    #TODO of an integer.

    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    # Empty lists to store the predictions and target values
    preds = []
    targets = []

    pbar = tqdm(enumerate(data_loader),
                total=len(data_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (input, _) in pbar:

        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(async=True)

        # Split the data into halves to separate the input from the GT
        satel_image, map_image = torch.chunk(input, chunks=2, dim=3)

        # Convert the input and its labels to Torch Variables
        input_var = torch.autograd.Variable(satel_image)
        target_var = torch.autograd.Variable(map_image)

        # Compute output
        output = model(input_var)

        # Compute and record the loss
        loss = criterion(output, target_var)
        losses.update(loss.data[0], input.size(0))

        # Compute and record the accuracy
        # acc1 = accuracy(output.data, target, topk=(1,))[0]
        # top1.update(acc1[0], input.size(0))

        # Get the predictions
        # _ = [preds.append(item) for item in [np.argmax(item) for item in output.data.cpu().numpy()]]
        # _ = [targets.append(item) for item in target.cpu().numpy()]

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar(logging_label + '/mb_loss', loss.data[0],
                              epoch * len(data_loader) + batch_idx)
        # writer.add_scalar(logging_label + '/mb_accuracy', acc1.cpu().numpy(), epoch * len(data_loader) + batch_idx)
        else:
            writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run),
                              loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            # writer.add_scalar(logging_label + '/mb_accuracy_{}'.format(multi_run), acc1.cpu().numpy(),
            #                   epoch * len(data_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description(logging_label +
                                 ' epoch [{0}][{1}/{2}]\t'.format(
                                     epoch, batch_idx, len(data_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=losses),
                # Acc1='{top1.avg:.3f}\t'.format(top1=top1),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

    # Logging the epoch-wise accuracy
    if multi_run is None:
        # writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch)
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label + '/output',
                                          image=output[:1],
                                          global_step=epoch)
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label + '/input',
                                          image=satel_image[:1])
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label + '/target',
                                          image=map_image[:1])
    else:
        # writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run), top1.avg, epoch)
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label +
                                          '/output_{}'.format(multi_run),
                                          image=output[:1],
                                          global_step=epoch)
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label +
                                          '/input_{}'.format(multi_run),
                                          image=satel_image[:1])
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label + '/target',
                                          image=map_image[:1])

    logging.info(
        _prettyprint_logging_label(logging_label) + ' epoch[{}]: '
        # 'Acc@1={top1.avg:.3f}\t'
        'Loss={loss.avg:.4f}\t'
        'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'.
        format(epoch,
               batch_time=batch_time,
               data_time=data_time,
               loss=losses,
               top1=top1))
def validate(val_loader, model, criterion, writer, epoch, class_encodings, no_cuda=False, log_interval=10, **kwargs):
    """
    The evaluation routine

    Parameters
    ----------

    val_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set
    model : torch.nn.module
        The network model being used
    criterion: torch.nn.loss
        The loss function used to compute the loss of the model
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes)
    class_encodings : List
        Contains the classes (range of ints)
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    -------
    meanIU.avg : float
        MeanIU of the model of the evaluated split
    """
    # 'Run' is injected in kwargs at runtime IFF it is a multi-run event
    multi_run = kwargs['run'] if 'run' in kwargs else None

    num_classes = len(class_encodings)

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    meanIU = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    pbar = tqdm(enumerate(val_loader), total=len(val_loader), unit='batch', ncols=150, leave=False)
    for batch_idx, (input, target) in pbar:
        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

        # Compute output
        output = model(input)

        # Compute and record the loss
        loss = criterion(output, target)
        losses.update(loss.item(), input.size(0))

        # Compute and record the accuracy
        _, _, mean_iu_batch, _ = accuracy_segmentation(target.cpu().numpy(), get_argmax(output), num_classes)
        meanIU.update(mean_iu_batch, input.size(0))

        # Add loss and meanIU to Tensorboard
        scalar_label = 'val/mb_loss' if multi_run is None else 'val/mb_loss_{}'.format(multi_run)
        writer.add_scalar(scalar_label, loss.item(), epoch * len(val_loader) + batch_idx)
        scalar_label = 'val/mb_meanIU' if multi_run is None else 'val/mb_meanIU_{}'.format(multi_run)
        writer.add_scalar(scalar_label, mean_iu_batch, epoch * len(val_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description('val epoch [{0}][{1}/{2}]\t'.format(epoch, batch_idx, len(val_loader)))

            pbar.set_postfix(Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                             Loss='{loss.avg:.4f}\t'.format(loss=losses),
                             meanIU='{meanIU.avg:.3f}\t'.format(meanIU=meanIU),
                             Data='{data_time.avg:.3f}\t'.format(data_time=data_time))


    # Logging the epoch-wise meanIU
    scalar_label = 'val/meanIU' if multi_run is None else 'val/meanIU_{}'.format(multi_run)
    writer.add_scalar(scalar_label, meanIU.avg, epoch)

    logging.info(_prettyprint_logging_label("val") +
                 ' epoch[{}]: '
                 'MeanIU={meanIU.avg:.3f}\t'
                 'Loss={loss.avg:.4f}\t'
                 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'
                 .format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, meanIU=meanIU))

    return meanIU.avg
def test(test_loader, model, criterion, writer, epoch, class_encodings, img_names_sizes_dict, dataset_folder,
         post_process, no_cuda=False, log_interval=10, **kwargs):
    """
    The evaluation routine

    Parameters
    ----------
    img_names_sizes_dict: dictionary {str: (int, int)}
        Key: gt image name (with extension), Value: image size
    test_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set
    model : torch.nn.module
        The network model being used
    criterion: torch.nn.loss
        The loss function used to compute the loss of the model
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes)
    class_encodings : List
        Contains the range of encoded classes
    img_names_sizes_dict
        # TODO
    dataset_folder : str
        # TODO
    post_process : Boolean
        apply post-processing to the output of the network
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    -------
    meanIU.avg : float
        MeanIU of the model of the evaluated split
    """
    # 'Run' is injected in kwargs at runtime IFF it is a multi-run event
    multi_run = kwargs['run'] if 'run' in kwargs else None

    num_classes = len(class_encodings)

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    meanIU = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    # Need to store the images currently being processes
    canvas = {}

    pbar = tqdm(enumerate(test_loader), total=len(test_loader), unit='batch', ncols=150, leave=False)
    for batch_idx, (input, target) in pbar:
        # Unpack input
        input, top_left_coordinates, test_img_names = input

        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

        # Compute output
        output = model(input)

        # Compute and record the loss
        loss = criterion(output, target)
        losses.update(loss.item(), input.size(0))

        # Compute and record the batch meanIU
        _, _, mean_iu_batch, _ = accuracy_segmentation(target.cpu().numpy(), get_argmax(output), num_classes)

        # Add loss and meanIU to Tensorboard
        scalar_label = 'test/mb_loss' if multi_run is None else 'test/mb_loss_{}'.format(multi_run)
        writer.add_scalar(scalar_label, loss.item(), epoch * len(test_loader) + batch_idx)
        scalar_label = 'test/mb_meanIU' if multi_run is None else 'test/mb_meanIU_{}'.format(multi_run)
        writer.add_scalar(scalar_label, mean_iu_batch, epoch * len(test_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description('test epoch [{0}][{1}/{2}]\t'.format(epoch, batch_idx, len(test_loader)))
            pbar.set_postfix(Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                             Loss='{loss.avg:.4f}\t'.format(loss=losses),
                             meanIU='{meanIU.avg:.3f}\t'.format(meanIU=meanIU),
                             Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

        # Output needs to be patched together to form the complete output of the full image
        # patches are returned as a sliding window over the full image, overlapping sections are averaged
        for patch, x, y, img_name in zip(output.data.cpu().numpy(), top_left_coordinates[0].numpy(), top_left_coordinates[1].numpy(), test_img_names):

            # Is a new image?
            if not img_name in canvas:
                # Create a new image of the right size filled with NaNs
                canvas[img_name] = np.empty((num_classes, *img_names_sizes_dict[img_name]))
                canvas[img_name].fill(np.nan)

            # Add the patch to the image
            canvas[img_name] = merge_patches(patch, (x, y), canvas[img_name])

            # Save the image when done
            if not np.isnan(np.sum(canvas[img_name])):
                # Save the final image
                mean_iu = process_full_image(img_name, canvas[img_name], multi_run, dataset_folder, class_encodings, post_process)
                # Update the meanIU
                meanIU.update(mean_iu, 1)
                # Remove the entry
                canvas.pop(img_name)
                logging.info("\nProcessed image {} with mean IU={}".format(img_name, mean_iu))

    # Canvas MUST be empty or something was wrong with coverage of all images
    assert len(canvas) == 0

    # Logging the epoch-wise meanIU
    scalar_label = 'test/mb_meanIU' if multi_run is None else 'test/mb_meanIU_{}'.format(multi_run)
    writer.add_scalar(scalar_label, meanIU.avg, epoch)

    logging.info(_prettyprint_logging_label("test") +
                 ' epoch[{}]: '
                 'MeanIU={meanIU.avg:.3f}\t'
                 'Loss={loss.avg:.4f}\t'
                 'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'
                 .format(epoch, batch_time=batch_time, data_time=data_time, loss=losses, meanIU=meanIU))

    return meanIU.avg
def validation(model, valid_loader, criterion, epoch, logger):
    with torch.no_grad():
        model.eval()
        losses = AverageMeter()
        tr_losses = AverageMeter()
        tcl_losses = AverageMeter()
        sin_losses = AverageMeter()
        cos_losses = AverageMeter()
        radii_losses = AverageMeter()

        for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map,
                cos_map, meta) in enumerate(valid_loader):

            img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device(
                img, train_mask, tr_mask, tcl_mask, radius_map, sin_map,
                cos_map)

            output = model(img)
            #output模型预测7通道分数图,tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask为标签
            tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \
                criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask)
            loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss

            # update losses
            losses.update(loss.item())
            tr_losses.update(tr_loss.item())
            tcl_losses.update(tcl_loss.item())
            sin_losses.update(sin_loss.item())
            cos_losses.update(cos_loss.item())
            radii_losses.update(radii_loss.item())

            if cfg.viz and i % cfg.viz_freq == 0:
                visualize_network_output(output, tr_mask, tcl_mask, mode='val')

            if i % cfg.display_freq == 0:
                print(
                    'Validation: - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}'
                    .format(loss.item(), tr_loss.item(), tcl_loss.item(),
                            sin_loss.item(), cos_loss.item(),
                            radii_loss.item()))

        logger.write_scalars(
            {
                'loss': losses.avg,
                'tr_loss': tr_losses.avg,
                'tcl_loss': tcl_losses.avg,
                'sin_loss': sin_losses.avg,
                'cos_loss': cos_losses.avg,
                'radii_loss': radii_losses.avg
            },
            tag='val',
            n_iter=epoch)

        print('Validation Loss: {}'.format(losses.avg))
def train(train_loader,
          model,
          criterion,
          optimizer,
          writer,
          epoch,
          class_encodings,
          no_cuda=False,
          log_interval=25,
          **kwargs):
    """
    Training routine

    Parameters
    ----------
    train_loader : torch.utils.data.DataLoader
        The dataloader of the train set.
    model : torch.nn.module
        The network model being used.
    criterion : torch.nn.loss
        The loss function used to compute the loss of the model.
    optimizer : torch.optim
        The optimizer used to perform the weight update.
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes).
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    ----------
    meanIU.avg : float
        meanIU of the model of the evaluated split
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None
    num_classes = len(class_encodings)

    # Instantiate the counters
    batch_time = AverageMeter()
    loss_meter = AverageMeter()
    meanIU = AverageMeter()
    data_time = AverageMeter()

    # Switch to train mode (turn on dropout & stuff)
    model.train()

    # Iterate over whole training set
    end = time.time()
    pbar = tqdm(enumerate(train_loader),
                total=len(train_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (input, target) in pbar:
        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

        mean_iu, loss = train_one_mini_batch(model, criterion, optimizer,
                                             input, target, loss_meter, meanIU,
                                             num_classes)

        # Add loss and accuracy to Tensorboard
        log_loss = loss.item()

        if multi_run is None:
            writer.add_scalar('train/mb_loss', log_loss,
                              epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/mb_meanIU', mean_iu,
                              epoch * len(train_loader) + batch_idx)
        else:
            writer.add_scalar('train/mb_loss_{}'.format(multi_run), log_loss,
                              epoch * len(train_loader) + batch_idx)
            writer.add_scalar('train/mb_meanIU_{}'.format(multi_run), mean_iu,
                              epoch * len(train_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # Log to console
        if batch_idx % log_interval == 0:
            pbar.set_description('train epoch [{0}][{1}/{2}]\t'.format(
                epoch, batch_idx, len(train_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=loss_meter),
                meanIU='{meanIU.avg:.3f}\t'.format(meanIU=meanIU),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

    # Logging the epoch-wise accuracy
    if multi_run is None:
        writer.add_scalar('train/meanIU', meanIU.avg, epoch)
    else:
        writer.add_scalar('train/meanIU_{}'.format(multi_run), meanIU.avg,
                          epoch)

    logging.debug(
        'Train epoch[{}]: '
        'MeanIU={meanIU.avg:.3f}\t'
        'Loss={loss.avg:.4f}\t'
        'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'.
        format(epoch,
               batch_time=batch_time,
               data_time=data_time,
               loss=loss_meter,
               meanIU=meanIU))

    # logging.info(_prettyprint_logging_label("train") +
    #              ' epoch[{}]: '
    #              'MeanIU={meanIU.avg:.3f}\t'
    #              'Loss={loss.avg:.4f}\t'
    #              'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'
    #              .format(epoch, batch_time=batch_time, data_time=data_time, loss=loss_meter, meanIU=meanIU))

    return meanIU.avg
def train(model, train_loader, criterion, scheduler, optimizer, epoch, logger):

    global train_step

    losses = AverageMeter()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    end = time.time()
    model.train()
    scheduler.step()

    print('Epoch: {} : LR = {}'.format(epoch, lr))

    for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map,
            meta) in enumerate(train_loader):
        data_time.update(time.time() - end)

        train_step += 1

        img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device(
            img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map)
        # 模型输出
        output = model(img)
        # loss 计算
        tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \
            criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask)
        loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss

        # backward
        # 每次迭代清空上一次的梯度
        optimizer.zero_grad()
        # 反向传播
        loss.backward()
        # 更新梯度
        optimizer.step()
        # 更新loss
        losses.update(loss.item())
        # 计算耗时
        batch_time.update(time.time() - end)
        end = time.time()

        if cfg.viz and i % cfg.viz_freq == 0:
            visualize_network_output(output, tr_mask, tcl_mask, mode='train')

        if i % cfg.display_freq == 0:
            print(
                '({:d} / {:d}) - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}'
                .format(i, len(train_loader), loss.item(), tr_loss.item(),
                        tcl_loss.item(), sin_loss.item(), cos_loss.item(),
                        radii_loss.item()))

        if i % cfg.log_freq == 0:
            logger.write_scalars(
                {
                    'loss': loss.item(),
                    'tr_loss': tr_loss.item(),
                    'tcl_loss': tcl_loss.item(),
                    'sin_loss': sin_loss.item(),
                    'cos_loss': cos_loss.item(),
                    'radii_loss': radii_loss.item()
                },
                tag='train',
                n_iter=train_step)

    if epoch % cfg.save_freq == 0:
        save_model(model, epoch, scheduler.get_lr(), optimizer)

    print('Training Loss: {}'.format(losses.avg))
Exemple #22
0
    def train(self, model, train_loader, criterion, scheduler, optimizer,
              epoch, logger, train_step):
        losses = AverageMeter()
        batch_time = AverageMeter()
        data_time = AverageMeter()
        end = time.time()
        model.train()
        scheduler.step()
        lr = scheduler.get_lr()[0]
        print('Epoch: {} : LR = {}'.format(epoch, lr))

        for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map,
                cos_map, meta) in enumerate(train_loader):
            data_time.update(time.time() - end)

            train_step += 1

            img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map = to_device(
                img, train_mask, tr_mask, tcl_mask, radius_map, sin_map,
                cos_map)

            output = model(img)
            tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss = \
                criterion(output, tr_mask, tcl_mask, sin_map, cos_map, radius_map, train_mask)
            loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss

            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            losses.update(loss.item())
            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if cfg.viz and i % cfg.viz_freq == 0:
                visualize_network_output(output,
                                         tr_mask,
                                         tcl_mask,
                                         mode='train')

            if i % cfg.display_freq == 0:
                #print(loss.item())
                #print(tr_loss.item())
                #print(tcl_loss.item())
                #print(sin_loss.item())
                #print(cos_loss.item())
                #print(radii_loss.item())
                try:
                    print(
                        '({:d} / {:d}) - Loss: {:.4f} - tr_loss: {:.4f} - tcl_loss: {:.4f} - sin_loss: {:.4f} - cos_loss: {:.4f} - radii_loss: {:.4f}'
                        .format(i, len(train_loader), loss.item(),
                                tr_loss.item(), tcl_loss.item(),
                                sin_loss.item(), cos_loss.item(),
                                radii_loss.item()))
                except:
                    print('({:d} / {:d}) - Loss: {:.4f} - tr_loss: {:.4f}'.
                          format(i, len(train_loader), loss.item(),
                                 tr_loss.item()))

            if i % cfg.log_freq == 0:
                try:
                    logger.write_scalars(
                        {
                            'loss': loss.item(),
                            'tr_loss': tr_loss.item(),
                            'tcl_loss': tcl_loss.item(),
                            'sin_loss': sin_loss.item(),
                            'cos_loss': cos_loss.item(),
                            'radii_loss': radii_loss.item()
                        },
                        tag='train',
                        n_iter=train_step)
                except:
                    logger.write_scalars(
                        {
                            'loss': loss.item(),
                            'tr_loss': tr_loss.item()
                        },
                        tag='train',
                        n_iter=train_step)

        if epoch % cfg.save_freq == 0:
            self.save_model(model, epoch, scheduler.get_lr(), optimizer)

        print('Training Loss: {}'.format(losses.avg))

        return train_step
Exemple #23
0
def _evaluate(data_loader,
              model,
              criterion,
              writer,
              epoch,
              logging_label,
              no_cuda=False,
              log_interval=10,
              **kwargs):
    """
    The evaluation routine

    Parameters
    ----------
    data_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set
    model : torch.nn.module
        The network model being used
    criterion: torch.nn.loss
        The loss function used to compute the loss of the model
    writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.
    epoch : int
        Number of the epoch (for logging purposes)
    logging_label : string
        Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages.
    no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.
    log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    Returns
    -------
    top1.avg : float
        Accuracy of the model of the evaluated split
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    # Empty lists to store the predictions and target values
    preds = []
    targets = []
    multi_run = False

    pbar = tqdm(enumerate(data_loader),
                total=len(data_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (input, target) in pbar:
        # todo: how to you implement sliding window accross batches

        if len(input.size()) == 5:
            multi_run = True
            # input [64, 5, 3, 299, 299]
            bs, ncrops, c, h, w = input.size()
            # input.view leaves the 3rd 4th and 5th dimension as is, but multiplies the 1st and 2nd together
            # result [320, 3, 299, 299]
            # result = input.view(-1, c, h, w) # fuse batch size and ncrops
            # result_avg = input.view(bs, -1, c, h, w).mean(1)
            input = input.view(-1, c, h, w)

            # If you are using tensor.max(1) then you get a tupel with two tensors, choose the first one
            # which is a floattensor and what you need.
            # input = result_avg[0]
        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(async=True)
            target = target.cuda(async=True)

        # Convert the input and its labels to Torch Variables
        # todo: check them out in debugger
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # Compute output
        output = model(input_var)

        if multi_run:
            output = output.view(bs, ncrops, -1).mean(1)

        # Compute and record the loss
        loss = criterion(output, target_var)
        losses.update(loss.data[0], input.size(0))

        # Compute and record the accuracy
        acc1 = accuracy(output.data, target, topk=(1, ))[0]
        top1.update(acc1[0], input.size(0))

        # Get the predictions
        _ = [
            preds.append(item) for item in
            [np.argmax(item) for item in output.data.cpu().numpy()]
        ]
        _ = [targets.append(item) for item in target.cpu().numpy()]

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar(logging_label + '/mb_loss', loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(logging_label + '/mb_accuracy',
                              acc1.cpu().numpy(),
                              epoch * len(data_loader) + batch_idx)
        else:
            writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run),
                              loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(
                logging_label + '/mb_accuracy_{}'.format(multi_run),
                acc1.cpu().numpy(),
                epoch * len(data_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description(logging_label +
                                 ' epoch [{0}][{1}/{2}]\t'.format(
                                     epoch, batch_idx, len(data_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=losses),
                Acc1='{top1.avg:.3f}\t'.format(top1=top1),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

    # Make a confusion matrix
    try:
        cm = confusion_matrix(y_true=targets, y_pred=preds)
        confusion_matrix_heatmap = make_heatmap(cm,
                                                data_loader.dataset.classes)
    except ValueError:
        logging.warning('Confusion Matrix did not work as expected')

        confusion_matrix_heatmap = np.zeros((10, 10, 3))

    # Logging the epoch-wise accuracy and confusion matrix
    if multi_run is None:
        writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch)
        # ERROR: save_image_and_log_tensorboard() got an unexpected keyword argument 'image_tensore'
        # changed 'image_tensor=confusion_matrix_heattmap' to 'image=confusion_mastrix_heatmap'
        save_image_and_log_to_tensorboard(writer,
                                          tag=logging_label +
                                          '/confusion_matrix',
                                          image=confusion_matrix_heatmap,
                                          global_step=epoch)
    else:
        writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run),
                          top1.avg, epoch)
        save_image_and_log_to_tensorboard(
            writer,
            tag=logging_label + '/confusion_matrix_{}'.format(multi_run),
            image=confusion_matrix_heatmap,
            global_step=epoch)

    logging.info(
        _prettyprint_logging_label(logging_label) + ' epoch[{}]: '
        'Acc@1={top1.avg:.3f}\t'
        'Loss={loss.avg:.4f}\t'
        'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'.
        format(epoch,
               batch_time=batch_time,
               data_time=data_time,
               loss=losses,
               top1=top1))

    # Generate a classification report for each epoch
    _log_classification_report(data_loader, epoch, preds, targets, writer)

    return top1.avg
def train(model, train_loader, criterion, scheduler, optimizer, epoch, logger):

    global train_step

    losses = AverageMeter()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    end = time.time()
    model.train()
    # scheduler.step()

    print('Epoch: {} : LR = {}'.format(epoch, scheduler.get_lr()))

    for i, (img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map,
            gt_roi) in enumerate(train_loader):
        data_time.update(time.time() - end)

        train_step += 1

        img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map \
            = to_device(img, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map)

        output, gcn_data = model(img, gt_roi, to_device)

        tr_loss, tcl_loss, sin_loss, cos_loss, radii_loss, gcn_loss \
            = criterion(output, gcn_data, train_mask, tr_mask, tcl_mask, radius_map, sin_map, cos_map)
        loss = tr_loss + tcl_loss + sin_loss + cos_loss + radii_loss + gcn_loss

        # backward
        try:
            optimizer.zero_grad()
            loss.backward()
        except:
            print("loss gg")
            continue

        optimizer.step()

        losses.update(loss.item())
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()
        gc.collect()

        if cfg.viz and i % cfg.viz_freq == 0:
            visualize_network_output(output,
                                     tr_mask,
                                     tcl_mask[:, :, :, 0],
                                     mode='train')

        if i % cfg.display_freq == 0:
            print(
                '({:d} / {:d})  Loss: {:.4f}  tr_loss: {:.4f}  tcl_loss: {:.4f}  '
                'sin_loss: {:.4f}  cos_loss: {:.4f}  radii_loss: {:.4f}  gcn_loss: {:.4f}'
                .format(i, len(train_loader), loss.item(), tr_loss.item(),
                        tcl_loss.item(), sin_loss.item(), cos_loss.item(),
                        radii_loss.item(), gcn_loss.item()))

        if i % cfg.log_freq == 0:
            logger.write_scalars(
                {
                    'loss': loss.item(),
                    'tr_loss': tr_loss.item(),
                    'tcl_loss': tcl_loss.item(),
                    'sin_loss': sin_loss.item(),
                    'cos_loss': cos_loss.item(),
                    'radii_loss': radii_loss.item(),
                    'gcn_loss:': gcn_loss.item()
                },
                tag='train',
                n_iter=train_step)

    if epoch % cfg.save_freq == 0:
        save_model(model, epoch, scheduler.get_lr(), optimizer)

    print('Training Loss: {}'.format(losses.avg))
Exemple #25
0
def train(train_loader, model, criterion, optimizer, epoch, tflogger):
    model.train()
    #taglist = ['module.conv1.weight','module.bn1.weight','module.bn1.bias','module.conv2.weight','module.conv2.bias','module.bn2.weight','module.bn2.bias','module.conv3.weight','module.conv3.bias','module.bn3.weight','module.bn3.bia','module.conv4.weight','module.conv4.bias']
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    running_metric_text = runningScore(2)
    running_metric_kernel = runningScore(2)
    global globalcounter

    end = time.time()
    for batch_idx, (imgs, gt_texts, gt_kernels,
                    training_masks) in enumerate(train_loader):
        data_time.update(time.time() - end)

        imgs = Variable(imgs.cuda())
        gt_texts = Variable(gt_texts.cuda())
        gt_kernels = Variable(gt_kernels.cuda())
        training_masks = Variable(training_masks.cuda())

        outputs = model(imgs)
        texts = outputs[:, 0, :, :]
        kernels = outputs[:, 1:, :, :]

        selected_masks = ohem_batch(texts, gt_texts, training_masks)
        selected_masks = Variable(selected_masks.cuda())

        loss_text = criterion(texts, gt_texts, selected_masks)

        loss_kernels = []
        mask0 = torch.sigmoid(texts).data.cpu().numpy()
        mask1 = training_masks.data.cpu().numpy()
        selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32')
        selected_masks = torch.from_numpy(selected_masks).float()
        selected_masks = Variable(selected_masks.cuda())
        for i in range(6):
            kernel_i = kernels[:, i, :, :]
            gt_kernel_i = gt_kernels[:, i, :, :]
            loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks)
            loss_kernels.append(loss_kernel_i)
        loss_kernel = sum(loss_kernels) / len(loss_kernels)

        loss = 0.7 * loss_text + 0.3 * loss_kernel
        losses.update(loss.item(), imgs.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        score_text = cal_text_score(texts, gt_texts, training_masks,
                                    running_metric_text)
        score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts,
                                        training_masks, running_metric_kernel)

        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % 20 == 0:
            output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format(
                batch=batch_idx + 1,
                size=len(train_loader),
                bt=batch_time.avg,
                total=batch_time.avg * batch_idx / 60.0,
                eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0,
                loss=losses.avg,
                acc=score_text['Mean Acc'],
                iou_t=score_text['Mean IoU'],
                iou_k=score_kernel['Mean IoU'])
            print(output_log)
            sys.stdout.flush()

        if batch_idx % 100 == 0:
            for tag, value in model.named_parameters():
                tag = tag.replace('.', '/')
                tflogger.histo_summary(tag,
                                       value.data.detach().cpu().numpy(),
                                       globalcounter)
                tflogger.histo_summary(tag + '/grad',
                                       value.grad.data.detach().cpu().numpy(),
                                       globalcounter)
            globalcounter += 1

    return (float(losses.avg), float(score_text['Mean Acc']),
            float(score_kernel['Mean Acc']), float(score_text['Mean IoU']),
            float(score_kernel['Mean IoU']))
Exemple #26
0
def _evaluate(data_loader,
              model,
              criterion,
              observer,
              observer_criterion,
              writer,
              epoch,
              logging_label,
              no_cuda=False,
              log_interval=10,
              **kwargs):
    """
    The evaluation routine

    Parameters
    ----------
    :param data_loader : torch.utils.data.DataLoader
        The dataloader of the evaluation set

    :param model : torch.nn.module
        The network model being used

    :param criterion: torch.nn.loss
        The loss function used to compute the loss of the model

    :param writer : tensorboardX.writer.SummaryWriter
        The tensorboard writer object. Used to log values on file for the tensorboard visualization.

    :param epoch : int
        Number of the epoch (for logging purposes)

    :param logging_label : string
        Label for logging purposes. Typically 'test' or 'valid'. Its prepended to the logging output path and messages.

    :param no_cuda : boolean
        Specifies whether the GPU should be used or not. A value of 'True' means the CPU will be used.

    :param log_interval : int
        Interval limiting the logging of mini-batches. Default value of 10.

    :return:
        None
    """
    multi_run = kwargs['run'] if 'run' in kwargs else None

    # Instantiate the counters
    batch_time = AverageMeter()
    losses = AverageMeter()
    observer_loss_meter = AverageMeter()
    top1 = AverageMeter()
    observer_acc_meter = AverageMeter()
    data_time = AverageMeter()

    # Switch to evaluate mode (turn off dropout & such )
    model.eval()

    # Iterate over whole evaluation set
    end = time.time()

    # Empty lists to store the predictions and target values
    preds = []
    targets = []

    pbar = tqdm(enumerate(data_loader),
                total=len(data_loader),
                unit='batch',
                ncols=150,
                leave=False)
    for batch_idx, (input, target) in pbar:

        # Measure data loading time
        data_time.update(time.time() - end)

        # Moving data to GPU
        if not no_cuda:
            input = input.cuda(async=True)
            target = target.cuda(async=True)

        # Convert the input and its labels to Torch Variables
        input_var = torch.autograd.Variable(input, volatile=True)
        target_var = torch.autograd.Variable(target, volatile=True)

        # Compute output
        output = model(input_var)

        # Get the features from second last layer
        input_features_var = torch.autograd.Variable(
            model.module.features.data)

        # Use observer on the features
        observer_acc, observer_loss = evaluate_one_mini_batch(
            observer, observer_criterion, input_features_var, target_var,
            observer_loss_meter, observer_acc_meter)

        # Compute and record the loss
        loss = criterion(output, target_var)
        losses.update(loss.data[0], input.size(0))

        # Compute and record the accuracy
        acc1 = accuracy(output.data, target, topk=(1, ))[0]
        top1.update(acc1[0], input.size(0))

        # Get the predictions
        _ = [
            preds.append(item) for item in
            [np.argmax(item) for item in output.data.cpu().numpy()]
        ]
        _ = [targets.append(item) for item in target.cpu().numpy()]

        # Add loss and accuracy to Tensorboard
        if multi_run is None:
            writer.add_scalar(logging_label + '/mb_loss', loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(logging_label + '/mb_accuracy',
                              acc1.cpu().numpy(),
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(logging_label + '/obs_mb_loss',
                              observer_loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(logging_label + '/obs_mb_accuracy',
                              observer_acc.cpu().numpy(),
                              epoch * len(data_loader) + batch_idx)
        else:
            writer.add_scalar(logging_label + '/mb_loss_{}'.format(multi_run),
                              loss.data[0],
                              epoch * len(data_loader) + batch_idx)
            writer.add_scalar(
                logging_label + '/mb_accuracy_{}'.format(multi_run),
                acc1.cpu().numpy(),
                epoch * len(data_loader) + batch_idx)
            writer.add_scalar(
                logging_label + '/obs_mb_loss_{}'.format(multi_run),
                observer_loss.data[0],
                epoch * len(data_loader) + batch_idx)
            writer.add_scalar(
                logging_label + '/obs_mb_accuracy_{}'.format(multi_run),
                observer_acc.cpu().numpy(),
                epoch * len(data_loader) + batch_idx)

        # Measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if batch_idx % log_interval == 0:
            pbar.set_description(logging_label +
                                 ' epoch [{0}][{1}/{2}]\t'.format(
                                     epoch, batch_idx, len(data_loader)))

            pbar.set_postfix(
                Time='{batch_time.avg:.3f}\t'.format(batch_time=batch_time),
                Loss='{loss.avg:.4f}\t'.format(loss=losses),
                Acc1='{top1.avg:.3f}\t'.format(top1=top1),
                Data='{data_time.avg:.3f}\t'.format(data_time=data_time))

    # Make a confusion matrix
    try:
        cm = confusion_matrix(y_true=targets, y_pred=preds)
        confusion_matrix_heatmap = make_heatmap(cm,
                                                data_loader.dataset.classes)
    except ValueError:
        logging.warning('Confusion Matrix did not work as expected')

        confusion_matrix_heatmap = np.zeros((10, 10, 3))

    # Logging the epoch-wise accuracy and confusion matrix
    if multi_run is None:
        writer.add_scalar(logging_label + '/accuracy', top1.avg, epoch)
        writer.add_scalar(logging_label + '/obs_accuracy',
                          observer_acc_meter.avg, epoch)
        save_image_and_log_to_tensorboard(
            writer,
            tag=logging_label + '/confusion_matrix',
            image_tensor=confusion_matrix_heatmap,
            global_step=epoch)
    else:
        writer.add_scalar(logging_label + '/accuracy_{}'.format(multi_run),
                          top1.avg, epoch)
        writer.add_scalar(logging_label + '/obs_accuracy_{}'.format(multi_run),
                          observer_acc_meter.avg, epoch)
        save_image_and_log_to_tensorboard(
            writer,
            tag=logging_label + '/confusion_matrix_{}'.format(multi_run),
            image_tensor=confusion_matrix_heatmap,
            global_step=epoch)

    logging.info(
        _prettyprint_logging_label(logging_label) + ' epoch[{}]: '
        'Acc@1={top1.avg:.3f}\t'
        'Loss={loss.avg:.4f}\t'
        'Batch time={batch_time.avg:.3f} ({data_time.avg:.3f} to load data)'.
        format(epoch,
               batch_time=batch_time,
               data_time=data_time,
               loss=losses,
               top1=top1))

    # Generate a classification report for each epoch
    _log_classification_report(data_loader, epoch, preds, targets, writer)

    return top1.avg