Example #1
0
def predict(**cfg):

    # Create a test loader
    loader = transforms.Compose([
        transforms.Resize(size=256),
        transforms.CenterCrop(size=224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    # Load the image as a Pytorch Tensor
    image = loader(Image.open(
        cfg["predict"]["image_path"]).convert("RGB")).float().unsqueeze(0)

    # Load Model
    model = load_checkpoint(**cfg)

    # Check GPU availability
    train_gpu, _ = check_gpu()
    if train_gpu:
        image = image.cuda()

    # Get model prediction
    output = model(image)
    _, pred = torch.max(output, 1)

    # Check prediction - Normal or Pneumonia
    print("Prediction: " + str(model.idx_to_class[pred]))
Example #2
0
def main(args):
    device = check_gpu()
    bs = 50 if str(device) == 'cpu' else args.batch_size
    dataset = ImageDataset(args.dataset, batch_sz=bs)
    if args.train.lower() == 'true':
        t_or_v = 'train'
        loader = dataset.train_loader
    elif args.train.lower() == 'false':
        t_or_v = 'valid'
        loader = dataset.valid_loader
    else:
        raise Exception('args.train not understood')

    save_path = Path(args.save)
    save_path.mkdir(parents=True, exist_ok=True)
    file_name = f'{args.dataset}_{t_or_v}_fid_stats'
    if (save_path / (file_name + '.npz')).exists():
        print(f"{(save_path/(file_name + '.npz'))} exists. Exiting !!!")
        return

    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[2048]
    model = InceptionV3([block_idx]).to(device)
    mu, sigma = calc_activation_stats(loader, model)
    mu, sigma = mu.cpu().numpy(), sigma.cpu().numpy()

    np.savez(save_path / file_name, mu=mu, sigma=sigma)
Example #3
0
def WeightedMultiLabelSigmoidLoss(model_output, target):
    """
    model_output: BS X NUM_CLASSES X H X W
    target: BS X H X W X NUM_CLASSES 
    """
    # Calculate weight. (edge pixel and non-edge pixel)
    weight_sum = utils.check_gpu(
        0,
        target.sum(dim=1).sum(dim=1).sum(dim=1).float().data)  # BS
    edge_weight = utils.check_gpu(
        0, weight_sum.data / float(target.size()[1] * target.size()[2]))
    non_edge_weight = utils.check_gpu(
        0, (target.size()[1] * target.size()[2] - weight_sum.data) /
        float(target.size()[1] * target.size()[2]))
    one_sigmoid_out = sigmoid(model_output)
    zero_sigmoid_out = 1 - one_sigmoid_out
    target = target.transpose(1, 3).transpose(
        2, 3).float()  # BS X NUM_CLASSES X H X W
    loss = -non_edge_weight.unsqueeze(1).unsqueeze(2).unsqueeze(3)*target*torch.log(one_sigmoid_out.clamp(min=1e-10)) - \
            edge_weight.unsqueeze(1).unsqueeze(2).unsqueeze(3)*(1-target)*torch.log(zero_sigmoid_out.clamp(min=1e-10))

    return loss.mean(dim=0).sum()
Example #4
0
def train(args, train_loader, model, optimizer, epoch, curr_lr, win_feats5,
          win_fusion, viz, global_step, accumulation_steps):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    feats5_losses = AverageMeter()
    fusion_losses = AverageMeter()
    total_losses = AverageMeter()

    # switch to eval mode to make BN unchanged.
    model.train()
    optimizer.zero_grad()

    end = time.time()
    for i, (img, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # Input for Image CNN.
        img_var = utils.check_gpu(0, img)  # BS X 3 X H X W
        target_var = utils.check_gpu(0, target)  # BS X H X W X NUM_CLASSES

        bs = img.size()[0] * accumulation_steps

        score_feats5, fused_feats = model(
            img_var)  # BS X NUM_CLASSES X 472 X 472

        feats5_loss = WeightedMultiLabelSigmoidLoss(score_feats5, target_var)
        fused_feats_loss = WeightedMultiLabelSigmoidLoss(
            fused_feats, target_var)
        loss = feats5_loss + fused_feats_loss

        loss.backward()

        # clear memory
        del img_var
        del target_var
        del score_feats5
        torch.cuda.empty_cache()

        # increase batch size by factor of accumulation steps (Gradient accumulation) for training with limited memory
        if (i + 1) % accumulation_steps == 0:
            feats5_losses.update(feats5_loss.data, bs)
            fusion_losses.update(fused_feats_loss.data, bs)
            total_losses.update(loss.data, bs)

            # Only plot the fused feats loss.
            trn_feats5_loss = feats5_loss.clone().cpu().data.numpy()
            trn_fusion_loss = fused_feats_loss.clone().cpu().data.numpy()
            viz.line(win=win_feats5,
                     name='train_feats5',
                     update='append',
                     X=np.array([global_step]),
                     Y=np.array([trn_feats5_loss]))
            viz.line(win=win_fusion,
                     name='train_fusion',
                     update='append',
                     X=np.array([global_step]),
                     Y=np.array([trn_fusion_loss]))

            optimizer.step()
            optimizer.zero_grad()
            global_step += 1

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if ((i + 1) % args.print_freq == 0):
                print("\n\n")
                print(
                    'Epoch: [{0}][{1}/{2}]\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                    'Total Loss {total_loss.val:.11f} ({total_loss.avg:.11f})\n'
                    'lr {learning_rate:.10f}\t'.format(
                        epoch,
                        int((i + 1) / accumulation_steps),
                        int(len(train_loader) / accumulation_steps),
                        batch_time=batch_time,
                        data_time=data_time,
                        total_loss=total_losses,
                        learning_rate=curr_lr))

    del feats5_loss
    del fused_feats_loss
    del feats5_losses
    del fusion_losses
    del total_losses
    torch.cuda.empty_cache()
    return global_step
Example #5
0
def validate(args, val_loader, model, epoch, win_feats5, win_fusion, viz,
             global_step):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    feats5_losses = AverageMeter()
    fusion_losses = AverageMeter()
    total_losses = AverageMeter()

    # switch to train mode
    model.eval()
    torch.no_grad()

    end = time.time()
    for i, (img, target) in enumerate(val_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # Input for Image CNN.
        img_var = utils.check_gpu(0, img)  # BS X 3 X H X W
        target_var = utils.check_gpu(0, target)  # BS X H X W X NUM_CLASSES

        bs = img.size()[0]

        score_feats5, fused_feats = model(
            img_var)  # BS X NUM_CLASSES X 472 X 472

        feats5_loss = WeightedMultiLabelSigmoidLoss(score_feats5, target_var)
        fused_feats_loss = WeightedMultiLabelSigmoidLoss(
            fused_feats, target_var)
        loss = feats5_loss + fused_feats_loss

        feats5_losses.update(feats5_loss.data, bs)
        fusion_losses.update(fused_feats_loss.data, bs)
        total_losses.update(loss.data, bs)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        # clear memory
        del img_var
        del target_var
        del score_feats5
        del fused_feats_loss
        del feats5_loss
        torch.cuda.empty_cache()

        if (i % args.print_freq == 0):
            print("\n\n")
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Total Loss {total_loss.val:.4f} ({total_loss.avg:.4f})\n'.
                  format(epoch,
                         i,
                         len(val_loader),
                         batch_time=batch_time,
                         data_time=data_time,
                         total_loss=total_losses))

    #viz.line(win=win_feats5, name='val_feats5', update='append', X=np.array([global_step]), Y=np.array([feats5_losses.avg]))
    #viz.line(win=win_fusion, name='val_fusion', update='append', X=np.array([global_step]), Y=np.array([fusion_losses.avg]))

    return fusion_losses.avg
Example #6
0
def evaluate(**cfg):

    # Load data, checkpoint and gpu status
    data_d, dataloaders = create_dataloaders(
        cfg['datadir'], batch_size=cfg['evaluate']['batch_size'])
    model = load_checkpoint(**cfg)
    train_on_gpu, multi_gpu = check_gpu()

    # Set default split as test
    if cfg["evaluate"]["data_split"] is not None:
        data_splits = list(cfg["evaluate"]["data_split"])
    else:
        data_splits = ["test"]

    # Create evaluate folder
    save_path = "./output/evaluate/" + cfg["run_name"] + "/"
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Initialize dict to save final metrics
    results_dict = {}

    # Iterate in all data_splits
    for data_split in data_splits:

        evaluation_loader = dataloaders[data_split]
        evaluation_loss = 0.0
        evaluation_acc = 0.0
        pred_list = []
        target_list = []

        class_correct = list(0. for i in range(2))
        class_total = list(0. for i in range(2))
        classes = [0, 1]
        criterion = nn.NLLLoss()
        model.eval()
        i = 1

        # iterate over dataset
        for data, target in evaluation_loader:
            i = i + 1
            if len(target) != 8:
                continue

            # move tensors to GPU if CUDA is available
            if train_on_gpu:
                data, target = data.cuda(), target.cuda()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # update evaluation loss
            evaluation_loss += loss.item() * data.size(0)
            # convert output probabilities to predicted class
            _, pred = torch.max(output, 1)
            # Compile all pred and target in list
            pred_list.extend(pred.squeeze().tolist())
            target_list.extend(target.squeeze().tolist())
            # compare predictions to true label
            correct_tensor = pred.eq(target.data.view_as(pred))
            accuracy = torch.mean(correct_tensor.type(torch.FloatTensor))
            evaluation_acc += accuracy.item() * data.size(0)

            correct = np.squeeze(
                correct_tensor.numpy()) if not train_on_gpu else np.squeeze(
                    correct_tensor.cpu().numpy())

            # calculate evaluation accuracy for each object class
            for i in range(cfg['evaluate']['batch_size']):
                label = target.data[i]
                class_correct[label] += correct[i].item()
                class_total[label] += 1

        # average evaluation loss and accuracy
        evaluation_loss = evaluation_loss / len(evaluation_loader.dataset)
        print(
            str(data_split).capitalize() +
            ' Loss: {:.6f}\n'.format(evaluation_loss))
        evaluation_acc = evaluation_acc / len(evaluation_loader.dataset)

        for i in range(2):
            if class_total[i] > 0:
                print(
                    str(data_split).capitalize() +
                    ' Accuracy of %5s: %2d%% (%2d/%2d)' %
                    (classes[i], 100 * class_correct[i] / class_total[i],
                     np.sum(class_correct[i]), np.sum(class_total[i])))
            else:
                print(
                    str(data_split).capitalize() +
                    ' Accuracy of %5s: N/A (no training examples)' %
                    (classes[i]))

        print('\n' + str(data_split).capitalize() +
              ' Accuracy (Overall): %2d%% (%2d/%2d)' %
              (100. * np.sum(class_correct) / np.sum(class_total),
               np.sum(class_correct), np.sum(class_total)))

        # Calculate all metrics
        clf_rep = precision_recall_fscore_support(target_list, pred_list)
        precision = clf_rep[0].round(2)
        recall = clf_rep[1].round(2)
        f1_score = clf_rep[2].round(2)

        # Save confusion matrix
        cm = confusion_matrix(target_list, pred_list)
        cm_disp = ConfusionMatrixDisplay(confusion_matrix=cm)
        cm_disp.plot()
        plt.title(
            str(data_split).capitalize() + " Set: " +
            str(len(evaluation_loader.dataset)) + " images")
        plt.savefig(save_path + "confusion_matrix_" + str(data_split) + ".png")

        # Save all metrics in dict
        results_dict[data_split] = [
            evaluation_loss, evaluation_acc, precision, recall, f1_score
        ]

    results_df = pd.DataFrame(
        results_dict,
        index=['Loss', 'Accuracy', 'Precision', 'Recall', 'F1_Score'])
    results_df.to_csv(save_path + "results.csv")
Example #7
0
def train(args, train_loader, model, optimizer, epoch, curr_lr, win_feats5,
          win_fusion, viz, global_step):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    feats5_losses = AverageMeter()
    fusion_losses = AverageMeter()
    total_losses = AverageMeter()

    # switch to eval mode to make BN unchanged.
    model.eval()

    end = time.time()
    for i, (img, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        # Input for Image CNN.
        img_var = utils.check_gpu(0, img)  # BS X 3 X H X W
        target_var = utils.check_gpu(0, target)  # BS X H X W X NUM_CLASSES

        bs = img.size()[0]

        score_feats5, fused_feats = model(
            img_var)  # BS X NUM_CLASSES X 352 X 352

        feats5_loss = WeightedMultiLabelSigmoidLoss(score_feats5, target_var)
        fused_feats_loss = WeightedMultiLabelSigmoidLoss(
            fused_feats, target_var)
        loss = feats5_loss + fused_feats_loss

        feats5_losses.update(feats5_loss.data[0], bs)
        fusion_losses.update(fused_feats_loss.data[0], bs)
        total_losses.update(loss.data[0], bs)

        # Only plot the fused feats loss.
        trn_feats5_loss = feats5_loss.clone().cpu().data.numpy()[0]
        trn_fusion_loss = fused_feats_loss.clone().cpu().data.numpy()[0]
        viz.line(win=win_feats5,
                 name='train_feats5',
                 update='append',
                 X=np.array([global_step]),
                 Y=np.array([trn_feats5_loss]))
        viz.line(win=win_fusion,
                 name='train_fusion',
                 update='append',
                 X=np.array([global_step]),
                 Y=np.array([trn_fusion_loss]))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if (i % args.print_freq == 0):
            print("\n\n")
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Total Loss {total_loss.val:.11f} ({total_loss.avg:.11f})\n'
                  'lr {learning_rate:.10f}\t'.format(epoch,
                                                     i,
                                                     len(train_loader),
                                                     batch_time=batch_time,
                                                     data_time=data_time,
                                                     total_loss=total_losses,
                                                     learning_rate=curr_lr))

        global_step += 1

    return global_step
def train(**cfg):
    """
    Train a PyTorch Model

     Params
     --------
         cfg: Config File with desired params

     Returns
     --------
         None, saves checkpoint and results in output directory
     """

    # Define checkpoint path
    save_path = "./output/train/" + cfg["run_name"] + "/"
    checkpoint_path = save_path + "checkpoint.pth"

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # Load dataset and pre-trained model
    data, dataloaders = create_dataloaders(
        cfg['datadir'], batch_size=cfg["train"]['batch_size'])
    print("Found {} train, {} val, {} test images".format(
        len(data['train']), len(data['val']), len(data['test'])))

    model = get_pretrained_model(model_name=cfg["model"])
    print("Loaded model: {} \n".format(cfg["model"]))

    # Move to gpu and parallelize
    train_on_gpu, multi_gpu = check_gpu()
    if train_on_gpu:
        model = model.to('cuda')
    if multi_gpu:
        model = nn.DataParallel(model)

    model.class_to_idx = data['train'].class_to_idx
    model.idx_to_class = {
        idx: class_
        for class_, idx in model.class_to_idx.items()
    }

    # Set criterion and optimizer
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters())
    model.optimizer = optimizer

    # Early stopping initialization
    epochs_no_improve = 0
    valid_loss_min = np.Inf
    max_epochs_stop = 5
    print_every = 2
    history = []
    overall_start = timer()

    # Number of epochs already trained (if using loaded in model weights)
    try:
        print(f'Model has been trained for: {model.epochs} epochs.\n')
    except:
        model.epochs = 0
        print(f'Starting Training from Scratch.')

    # Main loop
    for epoch in tqdm(range(cfg["train"]["n_epochs"])):
        # keep track of training and validation loss each epoch
        train_loss = 0.0
        valid_loss = 0.0
        train_acc = 0
        valid_acc = 0

        # Set to training
        model.train()
        start = timer()
        # Training loop for batches
        for ii, (data, target) in enumerate(dataloaders["train"]):
            # Tensors to gpu
            if train_on_gpu:
                data, target = data.cuda(), target.cuda()
            # Clear gradients
            optimizer.zero_grad()
            # Predicted outputs are log probabilities
            output = model(data)
            # Loss and backpropagation of gradients
            loss = criterion(output, target)
            loss.backward()
            # Update the parameters
            optimizer.step()
            # Track train loss by multiplying average loss by number of examples in batch
            train_loss += loss.item() * data.size(0)
            # Calculate accuracy by finding max log probability
            _, pred = torch.max(output, dim=1)
            correct_tensor = pred.eq(target.data.view_as(pred))
            # Need to convert correct tensor from int to float to average
            accuracy = torch.mean(correct_tensor.type(torch.FloatTensor))
            # Multiply average accuracy times the number of examples in batch
            train_acc += accuracy.item() * data.size(0)
            # Track training progress
            print(
                f'Epoch: {epoch}\t{100 * (ii + 1) / len(dataloaders["train"]): .2f}% complete. {timer() - start:.2f} '
                f'seconds elapsed in epoch.',
                end='\r')

        # After training loops ends, start validation
        model.epochs += 1
        # Don't need to keep track of gradients
        with torch.no_grad():
            # Set to evaluation mode
            model.eval()
            # Validation loop
            for data, target in dataloaders["val"]:
                # Tensors to gpu
                if train_on_gpu:
                    data, target = data.cuda(), target.cuda()
                # Forward pass
                output = model(data)
                # Validation loss
                loss = criterion(output, target)
                # Multiply average loss times the number of examples in batch
                valid_loss += loss.item() * data.size(0)
                # Calculate validation accuracy
                _, pred = torch.max(output, dim=1)
                correct_tensor = pred.eq(target.data.view_as(pred))
                accuracy = torch.mean(correct_tensor.type(torch.FloatTensor))
                # Multiply average accuracy times the number of examples
                valid_acc += accuracy.item() * data.size(0)
            # Calculate average losses
            train_loss = train_loss / len(dataloaders["train"].dataset)
            valid_loss = valid_loss / len(dataloaders["val"].dataset)
            # Calculate average accuracy
            train_acc = train_acc / len(dataloaders["train"].dataset)
            valid_acc = valid_acc / len(dataloaders["val"].dataset)
            history.append([train_loss, valid_loss, train_acc, valid_acc])
            # Print training and validation results
            if (epoch + 1) % print_every == 0:
                print(
                    f'\nEpoch: {epoch} \tTraining Loss: {train_loss:.4f} \tValidation Loss: {valid_loss:.4f}'
                )
                print(
                    f'\t\tTraining Accuracy: {100 * train_acc:.2f}%\t Validation Accuracy: {100 * valid_acc:.2f}%'
                )
            # Save the model if validation loss decreases
            if valid_loss < valid_loss_min:
                # Save model
                save_checkpoint(model, checkpoint_path)
                save_and_plot_results(history, save_path)

                # Track improvement
                epochs_no_improve = 0
                valid_loss_min = valid_loss
                best_epoch = epoch
            # Otherwise increment count of epochs with no improvement
            else:
                epochs_no_improve += 1
                # Trigger early stopping
                if epochs_no_improve >= max_epochs_stop:
                    print(
                        f'\nEarly Stopping! Total epochs: {epoch}. Best epoch: {best_epoch} with loss:'
                        f' {valid_loss_min:.2f} and acc: {100 * valid_acc:.2f}%'
                    )
                    total_time = timer() - overall_start
                    print(
                        f'{total_time:.2f} total seconds elapsed. {total_time / (epoch + 1):.2f} seconds per epoch.'
                    )

                    # Load the best state dict
                    #model = load_checkpoint(**cfg)
                    save_and_plot_results(history, save_path)
                    break

    # Record overall time and print out stats
    total_time = timer() - overall_start
    print(
        f'\nBest epoch: {best_epoch} with loss: {valid_loss_min:.2f} and acc: {100 * valid_acc:.2f}%'
    )
    print(
        f'{total_time:.2f} total seconds elapsed. {total_time / (epoch + 1):.2f} seconds per epoch.'
    )

    save_and_plot_results(history, save_path)
Example #9
0
from models.transformer_generator import TGenerator
from models.ViT_discriminator import Discriminator
from utils.utils import check_gpu, display_images
from utils.checkpoint import Checkpoint
from utils.loss import wgangp_eps_loss
from utils.datasets import ImageDataset
from metrics.torch_is_fid_score import is_fid_from_generator

gdrive = "/mnt/c/Google Drive/"

# Create a required checkpoint instance.
# If does not exists, Checkpoint class will create one.
ckp_folder = gdrive + "temporary_checkpoint"

device = check_gpu()
print(f"Using device: {device}")
"""# Training"""

gen_batch_sz = 64
dis_batch_sz = 32
latent_dims = 1024
lr, beta1, beta2 = 1e-4, 0, 0.999
num_epochs = 20

dataset = ImageDataset("cifar_10", batch_sz=dis_batch_sz, num_workers=2)
# display_images(dataset.train_loader)

Gen = TGenerator(latent_dims=latent_dims).to(device)
fixed_z = torch.randn(gen_batch_sz, latent_dims, device=device)
# summary(Gen,(latent_dims,))
Example #10
0
        ToTorchFormatTensor(div=False),
        normalize,
    ])
    label_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize([input_size, input_size],
                          interpolation=PIL.Image.NEAREST),
        transforms.ToTensor(),
    ])

    h5_f = h5py.File("./utils/val_label_binary_np.h5", 'r')

    for idx_img in range(len(test_list)):
        img = Image.open(test_list[idx_img]).convert('RGB')
        processed_img = img_transform(img).unsqueeze(0)
        processed_img = utils.check_gpu(None, processed_img)
        score_feats1, score_feats2, score_feats3, score_feats5, score_fuse_feats = model(
            processed_img, for_vis=True)

        # Load numpy from hdf5 for gt.
        np_data = h5_f['data/' +
                       ori_test_list[idx_img].replace('leftImg8bit', 'gtFine').
                       replace('/', '_').replace('.png', '_edge.npy')]
        label_data = []
        num_cls = np_data.shape[2]
        for k in range(num_cls):
            if np_data[:, :, num_cls - 1 - k].sum() > 0:
                label_tensor = label_transform(
                    torch.from_numpy(np_data[:, :, num_cls - 1 -
                                             k]).unsqueeze(0).float())
            else:  # ALL zeros, don't need transform, maybe a bit faster?..
Example #11
0
        normalize,
    ])
    label_transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize([input_size, input_size],
                          interpolation=PIL.Image.NEAREST),
        transforms.ToTensor(),
    ])

    # Loading gt label for visualization.
    h5_f = h5py.File("./utils/test_label_binary_np.h5", 'r')

    for idx_img in xrange(len(test_lst)):
        img = Image.open(test_lst[idx_img]).convert('RGB')
        processed_img = img_transform(img).unsqueeze(0)
        processed_img = utils.check_gpu(0, processed_img)
        score_feats1, score_feats2, score_feats3, score_feats5, score_fuse_feats = model(
            processed_img, for_vis=True)

        # Load numpy from hdf5 for gt.
        np_data = h5_f['data/' + ori_test_list[idx_img].replace(
            'image', 'label').replace('/', '_').replace('png', 'npy')]
        label_data = []
        num_cls = np_data.shape[2]
        for k in xrange(num_cls):
            if np_data[:, :, num_cls - 1 - k].sum() > 0:
                label_tensor = label_transform(
                    torch.from_numpy(np_data[:, :, num_cls - 1 -
                                             k]).unsqueeze(0).float())
            else:  # ALL zeros, don't need transform, maybe a bit faster?..
                label_tensor = torch.zeros(1, input_size, input_size).float()
                 RGB2BGR(roll=True),
                 ToTorchFormatTensor(div=False),
                 normalize,
                 ])
 
 for idx_img in xrange(len(test_lst)):
     img = Image.open(test_lst[idx_img]).convert('RGB')
     processed_img = img_transform(img).unsqueeze(0) # 1 X 3 X H X W
     height = processed_img.size()[2]
     width = processed_img.size()[3]
     if crop_size < height or crop_size < width:
         raise ValueError("Input image size must be smaller than crop size!")
     pad_h = crop_size - height
     pad_w = crop_size - width
     padded_processed_img = F.pad(processed_img, (0, pad_w, 0, pad_h), "constant", 0).data
     processed_img_var = utils.check_gpu(None, padded_processed_img) # change None to GPU Id if needed
     score_feats5, score_fuse_feats = model(processed_img_var) # 1 X 19 X CROP_SIZE X CROP_SIZE
     
     score_output = sigmoid(score_fuse_feats.transpose(1,3).transpose(1,2)).squeeze(0)[:height, :width, :] # H X W X 19
     for cls_idx in xrange(num_cls):
         # Convert binary prediction to uint8
         im_arr = np.empty((height, width), np.uint8)
         im_arr = (score_output[:,:,cls_idx].data.cpu().numpy())*255.0
         # print(im_arr)
          
         # Store value into img
         img_base_name_noext = os.path.splitext(os.path.basename(test_lst[idx_img]))[0]
         if not os.path.exists(os.path.join(args.output_dir, str(cls_idx))):
             os.makedirs(os.path.join(args.output_dir, str(cls_idx)))
         imwrite(os.path.join(args.output_dir, str(cls_idx), img_base_name_noext+'.png'), im_arr)
         print 'processed: '+test_lst[idx_img]