Exemple #1
0
def open_rgby(path, filename, train=True, stats=None):
    """ a function that reads RGBY image """
    #colors = ['red', 'green', 'blue', 'yellow']
    colors = ['red', 'green', 'blue']
    flags = cv2.IMREAD_GRAYSCALE
    img = [
        cv2.imread(os.path.join(path, filename + '_' + color + '.png'),
                   flags).astype(np.float32) for color in colors
    ]
    img = [cv2.resize(x, (512, 512)) / 255 for x in img]
    img = np.stack(img, axis=-1)
    ##### do not normalize
    stats = None
    if not stats is None:
        m, s = stats
        img = transforms.Normalize(img, m, s)
    if train:
        img = transforms.RandomRotate(img, 30)
        img = transforms.RandomDihedral(img)
        img = transforms.RandomLighting(img, 0.05, 0.05)
    return img
Exemple #2
0
def main():
    args = parser.parse_args()
    step = 0
    exp_name = f'{args.name}_{hp.max_lr}_{hp.cycle_length}'

    transforms = segtrans.JointCompose([segtrans.Resize(400),
                                        segtrans.RandomRotate(0, 90),
                                        segtrans.RandomCrop(256, 256),
                                        segtrans.ToTensor(),
                                        segtrans.Normalize(mean=hp.mean,
                                                           std=hp.std)])

    val_transforms = segtrans.JointCompose([segtrans.PadToFactor(),
                                            segtrans.ToTensor(),
                                            segtrans.Normalize(mean=hp.mean,
                                                               std=hp.std)])

    train_dataset = DSBDataset(f'{args.data}/train', transforms=transforms)
    val_dataset = DSBDataset(f'{args.data}/val', transforms=val_transforms)

    model = Unet()

    if args.checkpoint:
        checkpoint = torch.load(args.checkpoint)
        model.load_state_dict(checkpoint['state'])
        step = checkpoint['step']
        exp_name = checkpoint['exp_name']

    optimizer = Adam(model.parameters(), lr=hp.max_lr)

    if args.find_lr:
        scheduler = LRFinderScheduler(optimizer)
    else:
        scheduler = SGDRScheduler(optimizer, min_lr=hp.min_lr,
                                  max_lr=hp.max_lr, cycle_length=hp.cycle_length, current_step=step)

    model.cuda(device=args.device)
    train(model, optimizer, scheduler, train_dataset, val_dataset,
          n_epochs=args.epochs, batch_size=args.batch_size,
          exp_name=exp_name, device=args.device, step=step)
Exemple #3
0
def train(model, criterion, optimizer, lr_scheduler, data_dir, lr):
    """
    docstring

    Parameters:
    -----------
    model: pytorch model
    criterion:
    optimizer:
    lr_scheduler:
    data_dir:
    """
    time_at_start = time.time()
    best_acc = 0.0
    history = {
        "train_acc": [],
        "train_loss": [],
        "test_acc": [],
        "test_loss": []
    }
    datasets = make_datasets(data_dir, transforms=transforms.RandomRotate())
    dataloader = make_dataloaders(datasets)
    for epoch in range(NUM_EPOCHS):
        print("Epoch {}/{}".format(epoch, NUM_EPOCHS - 1))
        print("=" * 10)
        for phase in ["train", "test"]:
            if phase == "train":
                optimizer = lr_scheduler(optimizer, epoch, lr)
                model.train(True)
            else:
                model.train(False)
            print("Setting model to {} mode".format(phase))

            running_loss = 0.0
            running_corrects = 0
            len_data = len(datasets[phase])

            for index, data in enumerate(dataloader[phase]):
                inputs, labels, _ = data
                if USE_GPU:
                    inputs = torch.autograd.Variable(inputs.cuda())
                    labels = torch.autograd.Variable(labels.cuda())
                else:
                    inputs = torch.autograd.Variable(inputs)
                    labels = torch.autograd.Variable(labels)

                # zero parameter gradients before the forward pass
                optimizer.zero_grad()

                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                labels = labels.view(-1)
                loss = criterion(outputs, labels)

                if phase == "train":
                    loss.backward()
                    optimizer.step()

                running_loss += loss.data[0]
                running_corrects += torch.sum(preds == labels.data)
                if VERBOSE:
                    print_model_stats(index, BATCH_SIZE, len_data,
                                      loss.data[0])
            print("\n")

            # epoch stats
            epoch_loss = running_loss / len_data
            epoch_acc = running_corrects / len_data
            history["{}_acc".format(phase)].append(epoch_acc)
            history["{}_loss".format(phase)].append(epoch_loss)
            print("{} Loss: {:.4f} | Acc: {:.4f}".format(
                phase, epoch_loss, epoch_acc))

            # checkpoint the best model based on validation accuracy
            if phase == "test" and epoch_acc > best_acc:
                best_acc = epoch_acc
                model_path = "{}_checkpoint".format(SAVE_PATH)
                print("checkpointing model at {}".format(model_path))
                torch.save(model.state_dict(), model_path)

            # write history to JSON file
            history_path = "{}_history".format(SAVE_PATH)
            with open(history_path, "w") as f:
                json.dump(history, f, indent=4)

    time_elapsed = time.time() - time_at_start
    print("Training complete in {:.0f}m {:.0f}s".format(
        time_elapsed // 60, time_elapsed % 60))
    print("Best validation accuracy: {:.4f}".format(best_acc))
def train_model(model, criterion, optimizer, lr_scheduler):
    """docstring"""
    history = {"train_acc": [], "train_loss": [],
               "test_acc": [], "test_loss": []}
    transform = transforms.RandomRotate()

    datasets = {}
    train_data = dataset.CSVDataset(
        data_dir=DATA_DIR, csv=TRAIN_CSV,
        complete_csv=DATAFRAME, transforms=transform
    )
    train_data.equalise_groups("MoA", under_sample=False)
    datasets["train"] = train_data
    datasets["test"] = dataset.CSVDataset(
        data_dir=DATA_DIR, csv=TEST_CSV, complete_csv=DATAFRAME
    )

    dataloader = {}
    dataloader["train"] = torch.utils.data.DataLoader(
        datasets["train"], batch_size=BATCH_SIZE,
        shuffle=True, num_workers=NUM_WORKERS, pin_memory=True
    )
    dataloader["test"] = torch.utils.data.DataLoader(
        datasets["test"], batch_size=BATCH_SIZE,
        shuffle=False, num_workers=NUM_WORKERS, pin_memory=True
    )

    for epoch in range(NUM_EPOCHS):
        print("Epoch {}/{}".format(epoch, NUM_EPOCHS-1))
        print("="*10)
        # each epoch has training and validation phases
        optimizer = lr_scheduler(optimizer, epoch)
        model.train(True)

        running_loss = 0.0
        running_corrects = 0
        len_data = len(datasets["train"])

        for data in tqdm(dataloader["train"]):
            # ignore parent_img labels during training, these are only needed in testing
            inputs, labels, _ = data
            if USE_GPU:
                inputs = torch.autograd.Variable(inputs).cuda()
                labels = torch.autograd.Variable(labels).cuda()
            else:
                inputs = torch.autograd.Variable(inputs)
                labels = torch.autograd.Variable(labels)

            # zero the parameter gradients before the forward pass
            optimizer.zero_grad()

            # forward pass
            outputs  = model(inputs)
            _, preds = torch.max(outputs.data, 1)
            labels   = labels.view(-1)
            loss     = criterion(outputs, labels)

            # backprop if in the training phase
            loss.backward()
            optimizer.step()

            running_loss += loss.data[0]
            running_corrects += torch.sum(preds == labels.data)
        print("\n")

        # epoch stats for train and validation phases
        epoch_loss = running_loss / len_data
        epoch_acc  = running_corrects / len_data
        history["train_acc"].append(epoch_acc)
        history["train_loss"].append(epoch_loss)
        print("Loss: {:.4f} | Acc: {:.4f}".format(epoch_loss, epoch_acc))

        # write history dict as a JSON file at each epoch
        history_path = "{}_history".format(SAVE_PATH)
        with open(history_path, "w") as f:
            json.dump(history, f, indent=4)

    # Convert model to test mode and make predictions on test set
    # these need to be recorded so that the aggregate prediction
    # on all the cells in an image can be calculated.
    #
    # This can be done by grouping on the 'img_id' column in the .csv file
    print("\n")
    print("=" * 10)
    print("Testing")
    print("=" * 10)
    model.eval()
    parent_imgs = []
    predictions_list = []
    actual_vals = []
    for data in tqdm(dataloader["test"]):
        test_inputs, test_labels, parent_img = data
        test_labels = list(test_labels.cpu().numpy())
        if USE_GPU:
            test_inputs = torch.autograd.Variable(test_inputs).cuda()
        test_outputs = model(test_inputs)
        _, predictions = torch.max(test_outputs.data, 1)
        predictions = list(predictions.cpu().numpy())
        parent_imgs.append(parent_img)
        predictions_list.extend(predictions)
        actual_vals.extend(test_labels)
    for i, j, k in zip(parent_imgs, predictions_list, actual_vals):
        print(i, j, k)
Exemple #5
0
        return image, label

    def __len__(self):
        assert self.trainDataSize == self.maskDataSize
        assert self.trainDataSize == self.dataBoxSize

        return self.trainDataSize


if __name__ == '__main__':
    from torch.utils.data import DataLoader

    transforms = [
        # Transforms.RandomCrop(2300, 2300),
        Transforms.RondomFlip(),
        Transforms.RandomRotate(15),
        Transforms.Log(0.5),
        Transforms.Blur(0.2),
        Transforms.ToTensor(),
        Transforms.ToGray()
    ]
    dataset = UNetDataset('./data/train',
                          './data/train_cleaned',
                          transform=transforms)
    dataLoader = DataLoader(dataset=dataset,
                            batch_size=32,
                            shuffle=True,
                            num_workers=0)

    for index, (batch_x, batch_y) in enumerate(dataLoader):
        print(batch_x.size(),
Exemple #6
0
def train():

    # dataset
    transforms = [
        Transforms.ToGray(),
        Transforms.RondomFlip(),
        Transforms.RandomRotate(15),
        Transforms.RandomCrop(48, 48),
        Transforms.Log(0.5),
        # Transforms.EqualizeHist(0.5),
        # Transforms.Blur(0.2),
        Transforms.ToTensor()
    ]
    dataset = UNetDataset('./data/train/',
                          './data/train_cleaned/',
                          transform=transforms)
    dataLoader = DataLoader(dataset=dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=True,
                            num_workers=0)

    # init model
    net = UNet(1, 2).to(device)
    optimizer = optim.Adam(net.parameters(), lr=LR)
    loss_func = nn.CrossEntropyLoss().to(device)

    # load weight
    if os.path.exists(weight_with_optimizer):
        checkpoint = torch.load(weight_with_optimizer)
        net.load_state_dict(checkpoint['net'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print('load weight')

    # train
    for epoch in range(EPOCH):
        # train
        for step, (batch_x, batch_y) in enumerate(dataLoader):
            # import cv2
            # import numpy as np
            # display = np.concatenate(
            #     (batch_x[0][0].numpy(), batch_y[0][0].numpy().astype(np.float32)),
            #     axis=1
            # )
            # cv2.imshow('display', display)
            # cv2.waitKey()
            batch_x = batch_x.to(device)
            batch_y = batch_y.squeeze(1).to(device)
            output = net(batch_x)
            loss = loss_func(output, batch_y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        print('epoch: %d | loss: %.4f' % (epoch, loss.data.cpu()))

        # save weight
        if (epoch + 1) % 10 == 0:
            torch.save(
                {
                    'net': net.state_dict(),
                    'optimizer': optimizer.state_dict()
                }, weight_with_optimizer)
            torch.save({'net': net.state_dict()}, weight)
            print('saved')
Exemple #7
0
def get_dataloaders(data_location, labels_file, modalities,
                    wsi_patch_size=None, n_wsi_patches=None, batch_size=None,
                    exclude_patients=None, return_patient_id=False):
    """Instantiate PyTorch DataLoaders.

    Parameters
    ----------
    Returns
    -------
    Dict of Pytorch Dataloaders.
    """
    data_dirs = {
        'clinical': os.path.join(data_location, 'Clinical'),
        'wsi': os.path.join(data_location, 'WSI'),
        'mRNA': os.path.join(data_location, 'RNA-seq'),
        'miRNA': os.path.join(data_location, 'miRNA-seq'),
        'DNAm': os.path.join(data_location, 'DNAm/5k'),
        'CNV': os.path.join(data_location, 'CNV'),
    }

    data_dirs = {mod: data_dirs[mod] for mod in modalities}
    if batch_size is None:
        if 'wsi' in data_dirs.keys() and n_wsi_patches > 1:
            batch_size = 2**5
        else:
            batch_size = 2**7

    patient_labels = {'train': get_label_map(labels_file, 'train'),
                      'val': get_label_map(labels_file, 'val'),
                      'test': get_label_map(labels_file, 'test')}

    if 'wsi' in data_dirs.keys():
        transforms = {
            'train': torchvision.transforms.Compose([
                patch_transforms.ToPIL(),
                torchvision.transforms.CenterCrop(wsi_patch_size),
                torchvision.transforms.ColorJitter(
                    brightness=64/255, contrast=0.5, saturation=0.25,
                    hue=0.04),
                patch_transforms.ToNumpy(),
                patch_transforms.RandomRotate(),
                patch_transforms.RandomFlipUpDown(),
                patch_transforms.ToTensor(),
            ]),
            # No data augmentation for validation
            'val': torchvision.transforms.Compose([
                patch_transforms.ToPIL(),
                torchvision.transforms.CenterCrop(wsi_patch_size),
                patch_transforms.ToNumpy(),
                patch_transforms.ToTensor(),
            ]),
            'test': torchvision.transforms.Compose([
                patch_transforms.ToPIL(),
                torchvision.transforms.CenterCrop(wsi_patch_size),
                patch_transforms.ToNumpy(),
                patch_transforms.ToTensor(),
        ])}
    else:
        transforms = {'train': None, 'val': None, 'test': None}

    datasets = {x: dataset.MultimodalDataset(
        label_map=patient_labels[x],
        data_dirs=data_dirs,
        n_patches=n_wsi_patches,
        patch_size=wsi_patch_size,
        transform=transforms[x],
        exclude_patients=exclude_patients,
        return_patient_id=return_patient_id)
                for x in ['train', 'val', 'test']}

    print('Data modalities:')
    for mod in modalities:
        print('  ', mod)
    print()
    print('Dataset sizes (# patients):')
    for x in datasets.keys():
        print(f'   {x}: {len(datasets[x])}')
    print()
    print('Batch size:', batch_size)

    # Use "drop_last=True" to drop the last incomplete batch
    # to avoid undefined loss values due to lack of sufficient
    # orderable observation pairs caused by data censorship
    # When running all data with batch = 64:
    #    8880 % 64 = 48
    # When running 20 cancer data with batch = 64:
    #    7369 % 64 = 9

    dataloaders = {'train': torch.utils.data.DataLoader(
        datasets['train'], batch_size=batch_size,
        shuffle=True, num_workers=4, drop_last=True),
                   'val': torch.utils.data.DataLoader(
        datasets['val'], batch_size=batch_size * 2,
        shuffle=False, num_workers=4, drop_last=True),
                   'test': torch.utils.data.DataLoader(
        datasets['test'], batch_size=batch_size * 2,
        shuffle=False, num_workers=4, drop_last=True)}

    return dataloaders
Exemple #8
0
def train():
    transforms = [
        Transforms.RondomFlip(),
        Transforms.RandomRotate(15),
        Transforms.Log(0.5),
        Transforms.Blur(0.2),
        Transforms.ToGray(),
        Transforms.ToTensor()
    ]
    train_dataset = UNetDataset('./data/train/',
                                './data/train_cleaned/',
                                transform=transforms)
    train_dataLoader = DataLoader(dataset=train_dataset,
                                  batch_size=config.BATCH_SIZE,
                                  shuffle=True,
                                  num_workers=0)

    valid_dataset = UNetDataset('./data/valid/',
                                './data/valid_cleaned/',
                                transform=transforms)
    valid_dataLoader = DataLoader(dataset=valid_dataset,
                                  batch_size=config.BATCH_SIZE,
                                  shuffle=True,
                                  num_workers=0)

    net = UNet(n_channels=config.n_channels,
               n_classes=config.n_classes).to(config.device)
    writer = SummaryWriter()
    optimizer = optim.Adam(net.parameters(), lr=config.LR)
    if config.n_classes > 1:
        loss_func = nn.CrossEntropyLoss().to(config.device)
    else:
        loss_func = nn.BCEWithLogitsLoss().to(config.device)
    best_loss = float('inf')

    if os.path.exists(config.weight_with_optimizer):
        checkpoint = torch.load(config.weight_with_optimizer,
                                map_location='cpu')
        net.load_state_dict(checkpoint['net'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        print('load weight')

    for epoch in range(config.EPOCH):
        train_loss = 0
        net.train()
        for step, (batch_x, batch_y) in enumerate(train_dataLoader):
            batch_x = batch_x.to(device=config.device)
            batch_y = batch_y.squeeze(1).to(device=config.device)
            output = net(batch_x)
            loss = loss_func(output, batch_y)
            train_loss += loss.item()
            if loss < best_loss:
                best_loss = loss
                torch.save(
                    {
                        'net': net.state_dict(),
                        'optimizer': optimizer.state_dict()
                    }, config.best_model_with_optimizer)
                torch.save({'net': net.state_dict()}, config.best_model)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        net.eval()
        eval_loss = 0
        for step, (batch_x, batch_y) in enumerate(valid_dataLoader):
            batch_x = batch_x.to(device=config.device)
            batch_y = batch_y.squeeze(1).to(device=config.device)
            output = net(batch_x)
            valid_loss = loss_func(output, batch_y)
            eval_loss += valid_loss.item()

        writer.add_scalar("train_loss", train_loss, epoch)
        writer.add_scalar("eval_loss", eval_loss, epoch)
        print("*" * 80)
        print('epoch: %d | train loss: %.4f | valid loss: %.4f' %
              (epoch, train_loss, eval_loss))
        print("*" * 80)

        if (epoch + 1) % 10 == 0:
            torch.save(
                {
                    'net': net.state_dict(),
                    'optimizer': optimizer.state_dict()
                }, config.weight_with_optimizer)
            torch.save({'net': net.state_dict()}, config.weight)
            print('saved')

    writer.close()
Exemple #9
0
        weights[foreground] = n * self.fg_weight / foreground.sum().item()

        mask = mask * weights

        img = torch.cat([img, self.mean[None]], dim=0)

        if self.flip:
            img, mask, lbl = random_flip(img, mask, lbl)

        return img, mask, lbl


ROTATE_TRANS_1024 = tr.Compose([
    tr.AspectPreservingResizeTransform((1024, 768)),
    tr.Lift(T.Pad(88)),
    tr.RandomRotate(),
])

PAD_TRANS_1024 = tr.Compose([
    tr.AspectPreservingResizeTransform((1024, 768)),
    tr.Lift(T.Pad(88)),
])

RotatedISICDataset = rotated_dataset(ISICDataset)

if __name__ == '__main__':
    target_size = 1024, 768

    img_transform = T.Compose([T.ColorJitter(0.3, 0.3, 0.3, 0.), T.ToTensor()])

    d = ISICDataset('/home/jatentaki/Storage/jatentaki/Datasets/isic2018',
Exemple #10
0
    'print_aux': True,
}

ckpt_path = './ckpt'
exp_name = 'model'

img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.3598, 0.3653, 0.3662], [0.2573, 0.2663, 0.2756])
])
mask_transform = extend_transforms.MaskToTensor()

train_joint_transform = extend_transforms.Compose([
    extend_transforms.RandomScale(),
    extend_transforms.RandomSizedRatio(760, 842, 274, 304),
    extend_transforms.RandomRotate(args['rotate_degree']),
    extend_transforms.RandomCrop(args['train_crop_size']),
])

train_set = culane.CULANE('train',
                          joint_transform=train_joint_transform,
                          transform=img_transform,
                          mask_transform=mask_transform)
train_loader = DataLoader(train_set,
                          batch_size=args['train_batch_size'],
                          num_workers=10,
                          shuffle=True)

criterion = CrossEntropyLoss2d(weight=torch.Tensor([0.4, 1, 1, 1, 1]).cuda(),
                               size_average=True,
                               ignore_index=culane.ignore_label,
Exemple #11
0
        print("Ignoring --epochs outside of training mode")

    if args.no_jit and args.optimize:
        print("Ignoring --optimize in --no-jit setting")

    writer.add_text('general', str(vars(args)))

    transform = T.Compose([T.CenterCrop(644), T.ToTensor()])

    # if we are not padding the convolutions, we have to pad the input
    aug_pad = None if args.padding else tr.Lift(T.Pad(40))

    test_global_transform = aug_pad

    tr_global_transform = tr.Compose(
        [tr.RandomRotate(), tr.RandomFlip(), aug_pad])

    train_data = loader.DriveDataset(args.data_path,
                                     training=True,
                                     bloat=args.bloat,
                                     from_=args.cut,
                                     img_transform=transform,
                                     mask_transform=transform,
                                     label_transform=transform,
                                     global_transform=tr_global_transform)
    train_loader = DataLoader(train_data,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.workers)

    if args.test_on_train: