예제 #1
0
def train_model(
        model,
        criterion,
        optimizer,
        LR,
        num_epochs,
        dataloaders,
        dataset_sizes,
        weight_decay,
        weighted_cross_entropy_batchwise=False,
        fine_tune=False,
        regression=False):
    """
    Fine tunes torchvision model to NIH CXR data.

    Args:
        model: torchvision model to be finetuned (densenet-121 in this case)
        criterion: loss criterion (binary cross entropy loss, BCELoss)
        optimizer: optimizer to use in training (SGD)
        LR: learning rate
        num_epochs: continue training up to this many epochs
        dataloaders: pytorch train and val dataloaders
        dataset_sizes: length of train and val datasets
        weight_decay: weight decay parameter we use in SGD with momentum
    Returns:
        model: trained torchvision model
        best_epoch: epoch on which best model val loss was obtained
    """
    since = time.time()

    start_epoch = 1
    best_loss = 999999
    best_epoch = -1
    last_train_loss = -1

    tensorboard_writer_train = SummaryWriter('runs/loss/train_loss')
    tensorboard_writer_val = SummaryWriter('runs/loss/val_loss')

    if not fine_tune:
        PRED_LABEL = [
            'Atelectasis',
            'Cardiomegaly',
            'Effusion',
            'Infiltration',
            'Mass',
            'Nodule',
            'Pneumonia',
            'Pneumothorax',
            'Consolidation',
            'Edema',
            'Emphysema',
            'Fibrosis',
            'Pleural_Thickening',
            'Hernia']
    else:
        PRED_LABEL = [
            'Detector01',
            'Detector2',
            'Detector3']

    if not regression:
        tensorboard_writer_auc = {}
        tensorboard_writer_AP = {}
        for label in PRED_LABEL:
            tensorboard_writer_auc[label] = SummaryWriter('runs/auc/'+label)
            tensorboard_writer_AP[label] = SummaryWriter('runs/ap/' + label)
    else:
        tensorboard_writer_mae = SummaryWriter('runs/mae')

    # iterate over epochs
    for epoch in range(start_epoch, num_epochs + 1):
        print('Epoch {}/{}'.format(epoch, num_epochs))
        print('-' * 10)

        # set model to train or eval mode based on whether we are in train or
        # val; necessary to get correct predictions given batchnorm
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train(True)
            else:
                model.train(False)

            running_loss = 0.0

            total_done = 0

            for data in dataloaders[phase]:
                if not regression:
                    inputs, labels, _ = data
                else:
                    inputs, ground_truths, _ = data
                batch_size = inputs.shape[0]
                inputs = inputs.to(device)
                if not regression:
                    labels = (labels.to(device)).float()
                else:
                    ground_truths = (ground_truths.to(device)).float()

                with torch.set_grad_enabled(phase == 'train'):

                    outputs = model(inputs)

                    # calculate gradient and update parameters in train phase
                    optimizer.zero_grad()

                    if weighted_cross_entropy_batchwise:
                        beta = pos_neg_weights_in_batch(labels)
                        criterion = nn.BCEWithLogitsLoss(pos_weight=beta)

                    if not regression:
                        loss = criterion(outputs, labels)
                    else:
                        ground_truths = ground_truths.unsqueeze(1)
                        loss = criterion(outputs, ground_truths)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    running_loss += loss.item() * batch_size

            epoch_loss = running_loss / dataset_sizes[phase]

            if phase == 'train':
                tensorboard_writer_train.add_scalar('Loss', epoch_loss, epoch)
                last_train_loss = epoch_loss
            elif phase == 'val':
                tensorboard_writer_val.add_scalar('Loss', epoch_loss, epoch)

                if not regression:
                    preds, aucs = E.make_pred_multilabel(dataloaders['val'], model, save_as_csv=False, fine_tune=fine_tune)
                    aucs.set_index('label', inplace=True)
                    print(aucs)
                    for label in PRED_LABEL:
                        tensorboard_writer_auc[label].add_scalar('AUC', aucs.loc[label, 'auc'], epoch)
                        tensorboard_writer_AP[label].add_scalar('AP', aucs.loc[label, 'AP'], epoch)
                else:
                    mae, _, _ = E.evaluate_mae(dataloaders['val'], model)
                    print('MAE: ', mae)
                    tensorboard_writer_mae.add_scalar('MAE', mae, epoch)

            print(phase + ' epoch {}:loss {:.4f} with data size {}'.format(
                epoch, epoch_loss, dataset_sizes[phase]))

            # checkpoint model if has best val loss yet
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_epoch = epoch
                if not fine_tune:
                    checkpoint(model, best_loss, epoch, LR, filename='checkpoint_best')
                elif fine_tune and not regression:
                    checkpoint(model, best_loss, epoch, LR, filename='classification_checkpoint_best')
                else:
                    checkpoint(model, best_loss, epoch, LR, filename='regression_checkpoint_best')

        # log training and validation loss over each epoch
        with open("results/log_train", 'a') as logfile:
            logwriter = csv.writer(logfile, delimiter=',')
            if epoch == 1:
                logwriter.writerow(["epoch", "train_loss", "val_loss"])
            logwriter.writerow([epoch, last_train_loss, epoch_loss])

        # Save model after each epoch
        # checkpoint(model, best_loss, epoch, LR, filename='checkpoint')

        total_done += batch_size
        if total_done % (100 * batch_size) == 0:
            print("completed " + str(total_done) + " so far in epoch")

        # print elapsed time from the beginning after each epoch
        print('Training complete in {:.0f}m {:.0f}s'.format(
            (time.time() - since) // 60, (time.time() - since) % 60))

    # total time
    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    # load best model weights to return
    if not fine_tune:
        checkpoint_best = torch.load('results/checkpoint_best')
    elif fine_tune and not regression:
        checkpoint_best = torch.load('results/classification_checkpoint_best')
    else:
        checkpoint_best = torch.load('results/regression_checkpoint_best')
    model = checkpoint_best['model']
    return model, best_epoch
예제 #2
0
def train_cnn(PATH_TO_IMAGES, LR, WEIGHT_DECAY):
    """
    Train torchvision model to NIH data given high level hyperparameters.

    Args:
        PATH_TO_IMAGES: path to NIH images
        LR: learning rate
        WEIGHT_DECAY: weight decay parameter for SGD

    Returns:
        preds: torchvision model predictions on test fold with ground truth for comparison
        aucs: AUCs for each train,test tuple

    """
    NUM_EPOCHS = 100  #100
    BATCH_SIZE = 32  #16

    try:
        rmtree('results/')
    except BaseException:
        pass  # directory doesn't yet exist, no need to clear it
    os.makedirs("results/")

    # use imagenet mean,std for normalization
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    N_LABELS = 15  # we are predicting 15 labels. Originally 14 before adding Covid

    # load labels
    df = pd.read_csv("nih_labels_modified.csv", index_col=0)

    # define torchvision transforms
    data_transforms = {
        'train':
        transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.Resize(224),  #was transforms.Scale
            # because scale doesn't always give 224 x 224, this ensures 224 x
            # 224
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ]),
        'val':
        transforms.Compose([
            transforms.Resize(224),  #was transforms.Scale
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ]),
    }

    # create train/val dataloaders
    transformed_datasets = {}
    transformed_datasets['train'] = CXR.CXRDataset(
        path_to_images=PATH_TO_IMAGES,
        fold='train',
        transform=data_transforms['train'])
    transformed_datasets['val'] = CXR.CXRDataset(
        path_to_images=PATH_TO_IMAGES,
        fold='val',
        transform=data_transforms['val'])

    dataloaders = {}
    dataloaders['train'] = torch.utils.data.DataLoader(
        transformed_datasets['train'],
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=8)
    dataloaders['val'] = torch.utils.data.DataLoader(
        transformed_datasets['val'],
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=8)

    # please do not attempt to train without GPU as will take excessively long
    if not use_gpu:
        raise ValueError("Error, requires GPU")
    model = models.densenet121(pretrained=True)
    num_ftrs = model.classifier.in_features
    # add final layer with # outputs in same dimension of labels with sigmoid
    # activation
    model.classifier = nn.Sequential(nn.Linear(num_ftrs, N_LABELS),
                                     nn.Sigmoid())

    # put model on GPU
    model = model.cuda()

    # define criterion, optimizer for training
    criterion = nn.BCELoss()
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()),
                          lr=LR,
                          momentum=0.9,
                          weight_decay=WEIGHT_DECAY)
    dataset_sizes = {x: len(transformed_datasets[x]) for x in ['train', 'val']}

    # train model
    model, best_epoch = train_model(model,
                                    criterion,
                                    optimizer,
                                    LR,
                                    num_epochs=NUM_EPOCHS,
                                    dataloaders=dataloaders,
                                    dataset_sizes=dataset_sizes,
                                    weight_decay=WEIGHT_DECAY)

    # get preds and AUCs on test fold
    preds, aucs = E.make_pred_multilabel(data_transforms, model,
                                         PATH_TO_IMAGES)

    return preds, aucs
예제 #3
0
def train_full(PATH_TO_IMAGES, LR, WEIGHT_DECAY):
    """
    Train torchvision model to NIH data given high level hyperparameters.

    Args:
        PATH_TO_IMAGES: path to NIH images
        LR: learning rate
        WEIGHT_DECAY: weight decay parameter for SGD

    Returns:
        preds: torchvision model predictions on test fold with ground truth for comparison
        aucs: AUCs for each train,test tuple

    """

    #==========================================
    # Initialization
    #==========================================
    NUM_EPOCHS = 100
    BATCH_SIZE = 16

    try:
        rmtree('results/')
    except BaseException:
        pass  # directory doesn't yet exist, no need to clear it
    os.makedirs("results/")

    # use imagenet mean,std for normalization
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    # 0 - Does not have pneumonia, and 1 - positive for pneumonia
    N_LABELS = 2

    #==========================================
    # Load labels
    #==========================================
    print("- Loading Data")
    df = pd.read_csv("rsna_labels.csv", index_col=0)

    #==========================================
    # Define torchvision transforms
    #==========================================
    print("- Transforming Images")
    data_transforms = {
        'train':
        transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.Scale(224),
            # because scale doesn't always give 224 x 224, this ensures 224 x
            # 224
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ]),
        'val':
        transforms.Compose([
            transforms.Scale(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ]),
    }

    #==========================================
    # Create train/val dataloaders
    #==========================================
    transformed_datasets = {}
    transformed_datasets['train'] = CXR.RSNA_Dataset(
        path_to_images=PATH_TO_IMAGES,
        mode='train',
        transform=data_transforms['train'])
    transformed_datasets['val'] = CXR.RSNA_Dataset(
        path_to_images=PATH_TO_IMAGES,
        mode='val',
        transform=data_transforms['val'])

    dataloaders = {}
    dataloaders['train'] = torch.utils.data.DataLoader(
        transformed_datasets['train'],
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=8)
    dataloaders['val'] = torch.utils.data.DataLoader(
        transformed_datasets['val'],
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=8)

    # please do not attempt to train without GPU as will take excessively long
    if not use_gpu:
        raise ValueError("Error, requires GPU")
    model = models.densenet121(pretrained=False)

    #==========================================
    # Loading cheXnet weights
    #==========================================
    PATH_TO_MODEL = "pretrained/checkpoint"
    checkpoint = torch.load(PATH_TO_MODEL,
                            map_location=lambda storage, loc: storage)
    model = checkpoint['model']
    del checkpoint
    model.cpu()

    #==========================================
    # Addapting last Layer and adding Metadata
    #==========================================
    # calculating the input of the new layer
    num_ftrs = model.classifier[0].in_features

    # switching the classifier for one with only 2 classes

    model.classifier = nn.Sequential(nn.Linear(num_ftrs, N_LABELS),
                                     nn.Softmax())

    #==========================================
    # Put model on GPU
    #==========================================
    model = model.cuda()

    #==========================================
    # Define criterion, optimizer for training
    #==========================================
    criterion = nn.BCELoss()
    optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                 model.parameters()),
                          lr=LR,
                          momentum=0.9,
                          weight_decay=WEIGHT_DECAY)
    dataset_sizes = {x: len(transformed_datasets[x]) for x in ['train', 'val']}

    #==========================================
    # Train model
    #==========================================
    model, best_epoch = train_model(model,
                                    criterion,
                                    optimizer,
                                    LR,
                                    num_epochs=NUM_EPOCHS,
                                    dataloaders=dataloaders,
                                    dataset_sizes=dataset_sizes,
                                    weight_decay=WEIGHT_DECAY)

    #==========================================
    # Get preds and AUCs on test fold
    #==========================================
    preds, aucs = E.make_pred_multilabel(data_transforms, model,
                                         PATH_TO_IMAGES)

    return preds, aucs, model
예제 #4
0
def train_cnn(PATH_TO_IMAGES, LR, WEIGHT_DECAY, fine_tune=False, regression=False, freeze=False, adam=False,
              initial_model_path=None, initial_brixia_model_path=None, weighted_cross_entropy_batchwise=False,
              modification=None, weighted_cross_entropy=False):
    """
    Train torchvision model to NIH data given high level hyperparameters.

    Args:
        PATH_TO_IMAGES: path to NIH images
        LR: learning rate
        WEIGHT_DECAY: weight decay parameter for SGD

    Returns:
        preds: torchvision model predictions on test fold with ground truth for comparison
        aucs: AUCs for each train,test tuple

    """
    NUM_EPOCHS = 100
    BATCH_SIZE = 32

    try:
        rmtree('results/')
    except BaseException:
        pass  # directory doesn't yet exist, no need to clear it
    os.makedirs("results/")

    # use imagenet mean,std for normalization
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    N_LABELS = 14  # we are predicting 14 labels
    N_COVID_LABELS = 3  # we are predicting 3 COVID labels

    # define torchvision transforms
    data_transforms = {
        'train': transforms.Compose([
            # transforms.RandomHorizontalFlip(),
            transforms.Resize(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ]),
        'val': transforms.Compose([
            transforms.Resize(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ]),
    }

    # create train/val dataloaders
    transformed_datasets = {}
    transformed_datasets['train'] = CXR.CXRDataset(
        path_to_images=PATH_TO_IMAGES,
        fold='train',
        transform=data_transforms['train'],
        fine_tune=fine_tune,
        regression=regression)
    transformed_datasets['val'] = CXR.CXRDataset(
        path_to_images=PATH_TO_IMAGES,
        fold='val',
        transform=data_transforms['val'],
        fine_tune=fine_tune,
        regression=regression)

    dataloaders = {}
    dataloaders['train'] = torch.utils.data.DataLoader(
        transformed_datasets['train'],
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=8)
    dataloaders['val'] = torch.utils.data.DataLoader(
        transformed_datasets['val'],
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=8)

    # please do not attempt to train without GPU as will take excessively long
    if not use_gpu:
        raise ValueError("Error, requires GPU")

    if initial_model_path or initial_brixia_model_path:
        if initial_model_path:
            saved_model = torch.load(initial_model_path)
        else:
            saved_model = torch.load(initial_brixia_model_path)
        model = saved_model['model']
        del saved_model
        if fine_tune and not initial_brixia_model_path:
            num_ftrs = model.module.classifier.in_features
            if freeze:
                for feature in model.module.features:
                    for param in feature.parameters():
                        param.requires_grad = False
                    if feature == model.module.features.transition2:
                        break
            if not regression:
                model.module.classifier = nn.Linear(num_ftrs, N_COVID_LABELS)
            else:
                model.module.classifier = nn.Sequential(
                    nn.Linear(num_ftrs, 1),
                    nn.ReLU(inplace=True)
                )
    else:
        model = models.densenet121(pretrained=True)
        num_ftrs = model.classifier.in_features
        model.classifier = nn.Linear(num_ftrs, N_LABELS)

        if modification == 'transition_layer':
            # num_ftrs = model.features.norm5.num_features
            up1 = torch.nn.Sequential(torch.nn.ConvTranspose2d(num_ftrs, num_ftrs, kernel_size=3, stride=2, padding=1),
                                      torch.nn.BatchNorm2d(num_ftrs),
                                      torch.nn.ReLU(True))
            up2 = torch.nn.Sequential(torch.nn.ConvTranspose2d(num_ftrs, num_ftrs, kernel_size=3, stride=2, padding=1),
                                      torch.nn.BatchNorm2d(num_ftrs))

            transition_layer = torch.nn.Sequential(up1, up2)
            model.features.add_module('transition_chestX', transition_layer)

        if modification == 'remove_last_block':
            model.features.denseblock4 = nn.Sequential()
            model.features.transition3 = nn.Sequential()
            # model.features.norm5 = nn.BatchNorm2d(512)
            # model.classifier = nn.Linear(512, N_LABELS)
        if modification == 'remove_last_two_block':
            model.features.denseblock4 = nn.Sequential()
            model.features.transition3 = nn.Sequential()

            model.features.transition2 = nn.Sequential()
            model.features.denseblock3 = nn.Sequential()

            model.features.norm5 = nn.BatchNorm2d(512)
            model.classifier = nn.Linear(512, N_LABELS)

    print(model)

    # put model on GPU
    if not initial_model_path:
        model = nn.DataParallel(model)
    model.to(device)

    if regression:
        criterion = nn.MSELoss()
    else:
        if weighted_cross_entropy:
            pos_weights = transformed_datasets['train'].pos_neg_balance_weights()
            print(pos_weights)
            # pos_weights[pos_weights>40] = 40
            criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weights)
        else:
            criterion = nn.BCEWithLogitsLoss()

    if adam:
        optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=LR, weight_decay=WEIGHT_DECAY)
    else:
        optimizer = optim.SGD(filter(lambda p: p.requires_grad, model.parameters()), lr=LR, weight_decay=WEIGHT_DECAY, momentum=0.9)

    dataset_sizes = {x: len(transformed_datasets[x]) for x in ['train', 'val']}

    # train model
    if regression:
        model, best_epoch = train_model(model, criterion, optimizer, LR, num_epochs=NUM_EPOCHS,
                                        dataloaders=dataloaders, dataset_sizes=dataset_sizes,
                                        weight_decay=WEIGHT_DECAY, fine_tune=fine_tune, regression=regression)
    else:
        model, best_epoch = train_model(model, criterion, optimizer, LR, num_epochs=NUM_EPOCHS,
                                        dataloaders=dataloaders, dataset_sizes=dataset_sizes, weight_decay=WEIGHT_DECAY,
                                        weighted_cross_entropy_batchwise=weighted_cross_entropy_batchwise,
                                        fine_tune=fine_tune)
        # get preds and AUCs on test fold
        preds, aucs = E.make_pred_multilabel(dataloaders['val'], model, save_as_csv=False, fine_tune=fine_tune)
        return preds, aucs
예제 #5
0
TRAIN_DATASET = sys.argv[1]
TEST_DATASET = sys.argv[2]

if (TRAIN_DATASET not in DATASET_NAMES) or (TEST_DATASET not in DATASET_NAMES):
    raise Exception(
        'Invalid dataset name, needs to be one of the following: nih, chexpert, mimic'
    )

# torchvision transforms
data_transforms = get_data_transforms()

PATH_TO_MODEL = '/A/eduardo/projects/cxr_gen/' + TRAIN_DATASET + '/model/modelinf.pt'
PATH_TO_IMAGES = '/A/eduardo/datasets/' + TEST_DATASET

N_LABELS = 14

# load model
model = models.densenet121(pretrained=True)
num_ftrs = model.classifier.in_features
# add final layer with # outputs in same dimension of labels with sigmoid
# activation
model.classifier = nn.Sequential(nn.Linear(num_ftrs, N_LABELS), nn.Sigmoid())

model.load_state_dict(torch.load(PATH_TO_MODEL))
# put model on GPU
model = model.cuda()

preds = E.make_pred_multilabel(data_transforms, model, PATH_TO_IMAGES,
                               TRAIN_DATASET, TEST_DATASET)
E.calc_aucs(preds, TRAIN_DATASET, TEST_DATASET)
def train_model(model,
                criterion,
                optimizer,
                LR,
                num_epochs,
                dataloaders,
                dataset_sizes,
                weight_decay,
                dataset,
                data_transforms,
                PATH_TO_IMAGES,
                PATH_TO_CSV,
                val_on_dataset=False):
    """
    Fine tunes torchvision model to CheXpert data.

    Args:
        model: torchvision model to be finetuned (densenet-121 in this case)
        criterion: loss criterion (binary cross entropy loss, BCELoss)
        optimizer: optimizer to use in training (Adam)
        LR: learning rate
        num_epochs: continue training up to this many epochs
        dataloaders: pytorch train and val dataloaders
        dataset_sizes: length of train and val datasets
        weight_decay: weight decay parameter we use in SGD with momentum
    Returns:
        model: trained torchvision model
        best_epoch: epoch on which best model val loss was obtained

    """
    since = time.time()

    start_epoch = 1
    best_loss = 999999
    best_val_acc = 0
    best_epoch = -1
    last_train_loss = -1
    last_val_acc = 0

    if val_on_dataset:
        print("WARNING: VALIDATING ON DATASET")
        with open("results/logger", 'a') as logfile:
            logfile.write("WARNING: VALIDATING ON DATASET\n")

    print(time.strftime("%d %b %Y %H:%M:%S", time.gmtime(time.time() - 25200)))
    with open("results/logger", 'a') as logfile:
        logfile.write(
            time.strftime("%d %b %Y %H:%M:%S\n",
                          time.gmtime(time.time() - 25200)))

    # iterate over epochs
    for epoch in range(start_epoch, num_epochs + 1):
        print('Epoch {}/{}'.format(epoch, num_epochs))
        print('-' * 10)

        with open("results/logger", 'a') as logfile:
            logfile.write('Epoch {}/{}\n'.format(epoch, num_epochs))
            logfile.write('-' * 10 + '\n')

        running_loss = 0.0
        running_misclass = 0

        i = 0
        total_done = 0

        model.train(True)
        print("Model train: ", model.training)

        # iterate over all data in train/val dataloader:
        for data in dataloaders['train']:
            i += 1
            inputs, labels, _ = data
            #print(labels)
            batch_size = inputs.shape[0]
            inputs = Variable(inputs.cuda())
            if str(criterion) == str(nn.BCELoss()):
                labels = Variable(labels.cuda()).float()
            else:
                labels = Variable(labels.cuda()).long()
            outputs = model(inputs)

            # calculate gradient and update parameters in train phase
            optimizer.zero_grad()
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.data.item() * batch_size

            #if phase == 'val' and str(criterion) == str(nn.CrossEntropyLoss()):
            #    print(labels)
            #    idx = torch.argmax(outputs, dim=1)
            #    print(idx)

        epoch_loss = running_loss / dataset_sizes['train']

        print(
            time.strftime("%d %b %Y %H:%M:%S",
                          time.gmtime(time.time() - 25200)))
        with open("results/logger", 'a') as logfile:
            logfile.write(
                time.strftime("%d %b %Y %H:%M:%S\n",
                              time.gmtime(time.time() - 25200)))

        print('train epoch {}: loss {:.4f} with data size {}'.format(
            epoch, epoch_loss, dataset_sizes['train']))
        with open("results/logger", 'a') as logfile:
            logfile.write(
                'train epoch {}: loss {:.4f} with data size {}\n'.format(
                    epoch, epoch_loss, dataset_sizes['train']))

        time_elapsed = time.time() - since
        print('train epoch complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        with open("results/logger", 'a') as logfile:
            logfile.write('train epoch complete in {:.0f}m {:.0f}s\n'.format(
                time_elapsed // 60, time_elapsed % 60))

        last_train_loss = epoch_loss

        # keep track of best train loss
        if epoch_loss < best_loss:
            best_loss = epoch_loss

        # done with training

        if str(criterion) == 'BCELoss()':
            if val_on_dataset:
                _, metric = E.make_pred_multilabel(data_transforms,
                                                   model,
                                                   PATH_TO_IMAGES,
                                                   PATH_TO_CSV,
                                                   'auc',
                                                   dataset=dataset)
            else:
                _, metric = E.make_pred_multilabel(data_transforms, model,
                                                   PATH_TO_IMAGES, PATH_TO_CSV,
                                                   'auc')
        else:
            _, metric = E.make_pred_multilabel(data_transforms,
                                               model,
                                               PATH_TO_IMAGES,
                                               PATH_TO_CSV,
                                               'auc',
                                               dataset=dataset,
                                               multiclass=True)

        auc = metric.as_matrix(columns=metric.columns[1:])
        last_val_acc = auc[~np.isnan(auc)].mean()

        print(metric)
        with open("results/logger", 'a') as logfile:
            print(metric, file=logfile)

        print('mean epoch validation accuracy:', last_val_acc)
        with open("results/logger", 'a') as logfile:
            logfile.write('mean epoch validation accuracy: ' +
                          str(last_val_acc) + '\n')

        # decay learning rate if no val accuracy improvement in this epoch
        if last_val_acc < best_val_acc:
            print("Running with LR decay on val accuracy")
            with open("results/logger", 'a') as logfile:
                logfile.write("Running with LR decay on val accuracy\n")
            print("decay loss from " + str(LR) + " to " + str(LR / 10) +
                  " as not seeing improvement in val accuracy")
            with open("results/logger", 'a') as logfile:
                logfile.write("decay loss from " + str(LR) + " to " +
                              str(LR / 10) +
                              " as not seeing improvement in val accuracy\n")
            LR = LR / 10
            optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                          model.parameters()),
                                   lr=LR,
                                   betas=(0.9, 0.999),
                                   eps=1e-08,
                                   weight_decay=weight_decay)

            print("created new optimizer with LR " + str(LR))
            with open("results/logger", 'a') as logfile:
                logfile.write("created new optimizer with LR " + str(LR) +
                              '\n')

        # track best val accuracy yet
        if last_val_acc > best_val_acc:
            best_val_acc = last_val_acc
            best_epoch = epoch

        print('saving checkpoint_' + str(epoch))
        with open("results/logger", 'a') as logfile:
            logfile.write('saving checkpoint_' + str(epoch) + '\n')
        checkpoint(model, last_train_loss, last_val_acc, metric, epoch,
                   best_epoch, LR, weight_decay)

        # log training loss over each epoch
        with open("results/log_train", 'a') as logfile:
            logwriter = csv.writer(logfile, delimiter=',')
            if (epoch == 1):
                logwriter.writerow(["epoch", "train_loss", "average auc"])
            logwriter.writerow([epoch, last_train_loss, last_val_acc])

        print("best epoch: ", best_epoch)
        with open("results/logger", 'a') as logfile:
            logfile.write("best epoch: " + str(best_epoch) + '\n')

        print("best train loss: ", best_loss)
        with open("results/logger", 'a') as logfile:
            logfile.write("best train loss: " + str(best_loss) + '\n')

        print("best val accuracy: ", best_val_acc)
        with open("results/logger", 'a') as logfile:
            logfile.write("best val accuracy: " + str(best_val_acc) + '\n')

        total_done += batch_size
        if (total_done % (100 * batch_size) == 0):
            print("completed " + str(total_done) + " so far in epoch")
            with open("results/logger", 'a') as logfile:
                logfile.write("completed " + str(total_done) +
                              " so far in epoch\n")

        # break if no val loss improvement in 3 epochs
        if ((epoch - best_epoch) >= 3):
            print("no improvement in 3 epochs, break")
            with open("results/logger", 'a') as logfile:
                logfile.write("no improvement in 3 epochs, break\n")
            break

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    with open("results/logger", 'a') as logfile:
        logfile.write('Training complete in {:.0f}m {:.0f}s\n'.format(
            time_elapsed // 60, time_elapsed % 60))

    # load best model weights to return
    checkpoint_best = torch.load('results/checkpoint_' + str(best_epoch))
    model = checkpoint_best['model']

    return model, best_epoch
예제 #7
0
def run(PATH_TO_IMAGES, LR, WEIGHT_DECAY, opt):
    """
    Train torchvision model to NIH data given high level hyperparameters.

    Args:
        PATH_TO_IMAGES: path to NIH images
        LR: learning rate
        WEIGHT_DECAY: weight decay parameter for SGD

    Returns:
        preds: torchvision model predictions on test fold with ground truth for comparison
        aucs: AUCs for each train,test tuple

    """

    use_gpu = torch.cuda.is_available()
    gpu_count = torch.cuda.device_count()
    print("Available GPU count:" + str(gpu_count))

    wandb.init(project=opt.project, name=opt.run_name)
    wandb.config.update(opt, allow_val_change=True)

    NUM_EPOCHS = 60
    BATCH_SIZE = opt.batch_size

    if opt.eval_only:
        # test only. it is okay to have duplicate run_path
        os.makedirs(opt.run_path, exist_ok=True)
    else:
        # train from scratch, should not have the same run_path. Otherwise it will overwrite previous runs.
        try:
            os.makedirs(opt.run_path)
        except FileExistsError:
            print("[ERROR] run_path {} exists. try to assign a unique run_path".format(opt.run_path))
            return None, None
        except Exception as e:
            print("exception while creating run_path {}".format(opt.run_path))
            print(str(e))
            return None, None

    # use imagenet mean,std for normalization
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    N_LABELS = 14  # we are predicting 14 labels

    # define torchvision transforms
    if opt.random_crop:

        data_transforms = {
            'train': transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomResizedCrop(size=opt.input_size, scale=(0.8, 1.0)),  # crop then resize
                transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            'val': transforms.Compose([
                transforms.Resize(int(opt.input_size * 1.05)),
                transforms.CenterCrop(opt.input_size),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
        }

    else:
        data_transforms = {
            'train': transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.Resize(opt.input_size),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
            'val': transforms.Compose([
                transforms.Resize(opt.input_size),
                transforms.ToTensor(),
                transforms.Normalize(mean, std)
            ]),
        }
    # create train/val dataloaders
    transformed_datasets = {}
    transformed_datasets['train'] = CXR.CXRDataset(
        path_to_images=PATH_TO_IMAGES,
        fold='train',
        transform=data_transforms['train'])
    transformed_datasets['val'] = CXR.CXRDataset(
        path_to_images=PATH_TO_IMAGES,
        fold='val',
        transform=data_transforms['val'])

    worker_init_fn = set_seed(opt)

    dataloaders = {}
    dataloaders['train'] = torch.utils.data.DataLoader(
        transformed_datasets['train'],
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=30,
        drop_last=True,
        worker_init_fn=worker_init_fn
    )
    dataloaders['val'] = torch.utils.data.DataLoader(
        transformed_datasets['val'],
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=30,
        drop_last=True,
        worker_init_fn=worker_init_fn
    )

    # please do not attempt to train without GPU as will take excessively long
    if not use_gpu:
        raise ValueError("Error, requires GPU")

    # load model
    model = load_model(N_LABELS, opt)

    # define criterion, optimizer for training
    criterion = nn.BCELoss()

    optimizer = create_optimizer(model, LR, WEIGHT_DECAY, opt)

    scheduler = lr_scheduler.ReduceLROnPlateau(
        optimizer,
        'max',
        factor=opt.lr_decay_ratio,
        patience=opt.patience,
        verbose=True
    )

    dataset_sizes = {x: len(transformed_datasets[x]) for x in ['train', 'val']}

    if opt.eval_only:
        print("loading best model statedict")
        # load best model weights to return
        checkpoint_best = torch.load(os.path.join(opt.run_path, 'checkpoint'))
        model = load_model(N_LABELS, opt=opt)
        model.load_state_dict(checkpoint_best['state_dict'])

    else:
        # train model
        model, best_epoch = train_model(
            model,
            criterion,
            optimizer,
            LR,
            scheduler=scheduler,
            num_epochs=NUM_EPOCHS,
            dataloaders=dataloaders,
            dataset_sizes=dataset_sizes,
            PATH_TO_IMAGES=PATH_TO_IMAGES,
            data_transforms=data_transforms,
            opt=opt,
        )

    # get preds and AUCs on test fold
    preds, aucs = E.make_pred_multilabel(
        data_transforms,
        model,
        PATH_TO_IMAGES,
        fold="test",
        opt=opt,
    )

    wandb.log({
        'val_official': np.average(list(aucs.auc))
    })

    return preds, aucs
예제 #8
0
def train_model(
        model,
        criterion,
        optimizer,
        LR,
        scheduler,
        num_epochs,
        dataloaders,
        dataset_sizes,
        PATH_TO_IMAGES,
        data_transforms,
        opt,
):
    """
    Fine tunes torchvision model to NIH CXR data.

    Args:
        model: torchvision model to be finetuned (densenet-121 in this case)
        criterion: loss criterion (binary cross entropy loss, BCELoss)
        optimizer: optimizer to use in training (SGD)
        LR: learning rate
        num_epochs: continue training up to this many epochs
        dataloaders: pytorch train and val dataloaders
        dataset_sizes: length of train and val datasets
        weight_decay: weight decay parameter we use in SGD with momentum
    Returns:
        model: trained torchvision model
        best_epoch: epoch on which best model val loss was obtained

    """
    since = time.time()

    start_epoch = 1
    best_auc = -1
    best_epoch = -1
    last_train_loss = -1

    # iterate over epochs
    for epoch in range(start_epoch, num_epochs + 1):
        print('Epoch {}/{}(max)'.format(epoch, num_epochs))
        print('-' * 10)

        # set model to train or eval mode based on whether we are in train or val
        # necessary to get correct predictions given batchnorm
        for phase in ['train', 'val']:
            print('Epoch %03d, ' % epoch, phase)
            if phase == 'train':
                model.train(True)
            else:
                model.train(False)

            running_loss = 0.0

            i = 0
            total_done = 0
            # iterate over all data in train/val dataloader:
            data_length = len(dataloaders[phase])

            for data_idx, data in enumerate(dataloaders[phase]):
                inputs, labels, _ = data
                batch_size = inputs.shape[0]

                if phase == 'val':
                    with torch.no_grad():
                        inputs = inputs.cuda(opt.gpu_ids[0])
                        labels = labels.cuda(opt.gpu_ids[0]).float()
                        outputs = model(inputs)
                        if isinstance(outputs, tuple):
                            # has dot product
                            outputs, dp = outputs
                        else:
                            dp = None

                        # calculate gradient and update parameters in train phase
                        optimizer.zero_grad()

                        loss = criterion(outputs, labels)
                else:
                    inputs = inputs.cuda(opt.gpu_ids[0])
                    labels = labels.cuda(opt.gpu_ids[0]).float()
                    outputs = model(inputs)

                    if isinstance(outputs, tuple):
                        # has dot product
                        outputs, dp = outputs
                    else:
                        dp = None

                    # calculate gradient and update parameters in train phase
                    optimizer.zero_grad()

                    loss = criterion(outputs, labels)

                    if dp is not None:
                        dp_loss = opt.orth_loss_lambda * torch.abs(dp.mean())
                        loss = loss + dp_loss

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                    if data_idx % 20 == 0:
                        wandb.log({
                            'epoch': epoch + data_idx / float(len(dataloaders[phase])),
                            'loss': loss.cpu(),
                            'lr': list(optimizer.param_groups)[0]['lr']
                        })

                if data_idx == 0:
                    log_images = []
                    for image in list(inputs[:10].cpu()):
                        log_images.append(wandb.Image(
                            np.transpose(image.numpy(), (1, 2, 0)),
                            caption='{}_image'.format(phase)
                        ))

                    wandb.log({'{}_image'.format(phase): log_images})

                running_loss += loss.data.item() * batch_size

                if data_idx % 100 == 0:
                    print("{} / {} ".format(data_idx, data_length), end="\r", flush=True)

            epoch_loss = running_loss / dataset_sizes[phase]

            if phase == 'train':
                last_train_loss = epoch_loss

            print(phase + ' epoch {}:loss {:.4f} with data size {}'.format(
                epoch, epoch_loss, dataset_sizes[phase]))

            # decay learning rate if no val loss improvement in this epoch
            if phase == 'val':
                pred, auc = E.make_pred_multilabel(
                    data_transforms,
                    model,
                    PATH_TO_IMAGES,
                    fold="val",
                    opt=opt,
                )
                wandb.log({
                    'epoch': epoch + 1,
                    'performance': np.average(list(auc.auc))
                })

                epoch_auc = np.average(list(auc.auc))
                scheduler.step(epoch_auc)

            # checkpoint model
            if phase == 'val' and epoch_auc > best_auc:
                # best_loss = epoch_loss
                best_auc = epoch_auc
                best_epoch = epoch

                checkpoint(model, best_auc, epoch, LR, opt)

            # log training and validation loss over each epoch
            if phase == 'val':
                with open(os.path.join(opt.run_path, "log_train"), 'a') as logfile:
                    logwriter = csv.writer(logfile, delimiter=',')
                    if (epoch == 1):
                        logwriter.writerow(["epoch", "train_loss", "val_loss"])
                    logwriter.writerow([epoch, last_train_loss, epoch_loss])

        total_done += batch_size
        if (total_done % (100 * batch_size) == 0):
            print("completed " + str(total_done) + " so far in epoch")

        # break if no val loss improvement in 3 epochs
        if np.round(list(optimizer.param_groups)[0]['lr'], 5) <= np.round(
                LR * (opt.lr_decay_ratio ** opt.num_lr_drops), 5):
            break

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))

    # load best model weights to return
    checkpoint_best = torch.load(os.path.join(opt.run_path, 'checkpoint'))
    model = load_model(N_LABELS=14, opt=opt)
    model.load_state_dict(checkpoint_best['state_dict'])

    return model, best_epoch
예제 #9
0
def train_cnn(PATH_TO_IMAGES, LR, WEIGHT_DECAY):
    """
    Train torchvision model to NIH data given high level hyperparameters.

    Args:
        PATH_TO_IMAGES: path to NIH images
        LR: learning rate
        WEIGHT_DECAY: weight decay parameter for SGD

    Returns:
        preds: torchvision model predictions on test fold with ground truth for comparison
        aucs: AUCs for each train,test tuple

    """
    NUM_EPOCHS = 100
    BATCH_SIZE = 16

    try:
        rmtree('results/')
    except BaseException:
        pass  # directory doesn't yet exist, no need to clear it
    os.makedirs("results/")

    # use imagenet mean,std for normalization
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]

    N_LABELS = 14  # we are predicting 14 labels

    # load labels
    df = pd.read_csv("nih_labels.csv", index_col=0)

    # define torchvision transforms
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.Scale(224),
            # because scale doesn't always give 224 x 224, this ensures 224 x
            # 224
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ]),
        'val': transforms.Compose([
            transforms.Scale(224),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ]),
    }

    # create train/val dataloaders
    transformed_datasets = {}
    transformed_datasets['train'] = CXR.CXRDataset(
        path_to_images=PATH_TO_IMAGES,
        fold='train',
        transform=data_transforms['train'])
    transformed_datasets['val'] = CXR.CXRDataset(
        path_to_images=PATH_TO_IMAGES,
        fold='val',
        transform=data_transforms['val'])

    dataloaders = {}
    dataloaders['train'] = torch.utils.data.DataLoader(
        transformed_datasets['train'],
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=8)
    dataloaders['val'] = torch.utils.data.DataLoader(
        transformed_datasets['val'],
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=8)

    # please do not attempt to train without GPU as will take excessively long
    if not use_gpu:
        raise ValueError("Error, requires GPU")
    model = models.densenet121(pretrained=True)
    num_ftrs = model.classifier.in_features
    # add final layer with # outputs in same dimension of labels with sigmoid
    # activation
    model.classifier = nn.Sequential(
        nn.Linear(num_ftrs, N_LABELS), nn.Sigmoid())

    # put model on GPU
    model = model.cuda()

    # define criterion, optimizer for training
    criterion = nn.BCELoss()
    optimizer = optim.SGD(
        filter(
            lambda p: p.requires_grad,
            model.parameters()),
        lr=LR,
        momentum=0.9,
        weight_decay=WEIGHT_DECAY)
    dataset_sizes = {x: len(transformed_datasets[x]) for x in ['train', 'val']}

    # train model
    model, best_epoch = train_model(model, criterion, optimizer, LR, num_epochs=NUM_EPOCHS,
                                    dataloaders=dataloaders, dataset_sizes=dataset_sizes, weight_decay=WEIGHT_DECAY)

    # get preds and AUCs on test fold
    preds, aucs = E.make_pred_multilabel(
        data_transforms, model, PATH_TO_IMAGES)

    return preds, aucs