Ejemplo n.º 1
0
def main(args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = torch.load(args.model)
    model.eval()
    model.to(device)

    print(model.aug_params)
    augs = Augmentations(**model.aug_params)
    dataset = CSVDatasetWithName(args.dataset_root,
                                 args.dataset_csv,
                                 'image',
                                 'label',
                                 transform=augs.tf_transform,
                                 add_extension='.jpg',
                                 split=args.dataset_split)

    score, preds = test_with_augmentation(model, dataset, device, 8, args.n)
    print(score)

    if args.print_predictions:
        for _, row in preds.iterrows():
            print("{},{}".format(row['image'], row['score']))

    if args.output:
        preds.to_csv(args.output,
                     index=False,
                     columns=[
                         'image', 'MEL', 'NV', 'BCC', 'AKIEC', 'BKL', 'DF',
                         'VASC'
                     ])
Ejemplo n.º 2
0
def main(train_root, train_csv, val_root, val_csv, test_root, test_csv, epochs,
         aug, model_name, batch_size, num_workers, val_samples, test_samples,
         early_stopping_patience, limit_data, images_per_epoch, _run):
    assert (model_name
            in ('inceptionv4', 'resnet152', 'densenet161', 'senet154'))

    AUGMENTED_IMAGES_DIR = os.path.join(fs_observer.dir, 'images')
    CHECKPOINTS_DIR = os.path.join(fs_observer.dir, 'checkpoints')
    BEST_MODEL_PATH = os.path.join(CHECKPOINTS_DIR, 'model_best.pth')
    LAST_MODEL_PATH = os.path.join(CHECKPOINTS_DIR, 'model_last.pth')
    RESULTS_CSV_PATH = os.path.join('results', 'results.csv')
    EXP_NAME = _run.meta_info['options']['--name']
    EXP_ID = _run._id

    for directory in (AUGMENTED_IMAGES_DIR, CHECKPOINTS_DIR):
        os.makedirs(directory)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if model_name == 'inceptionv4':
        model = ptm.inceptionv4(num_classes=1000, pretrained='imagenet')
        model.last_linear = nn.Linear(model.last_linear.in_features, 2)
        aug['size'] = 299
        aug['mean'] = model.mean
        aug['std'] = model.std
    elif model_name == 'resnet152':
        model = models.resnet152(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 2)
        aug['size'] = 224
        aug['mean'] = [0.485, 0.456, 0.406]
        aug['std'] = [0.229, 0.224, 0.225]
    elif model_name == 'densenet161':
        model = models.densenet161(pretrained=True)
        model.classifier = nn.Linear(model.classifier.in_features, 2)
        aug['size'] = 224
        aug['mean'] = [0.485, 0.456, 0.406]
        aug['std'] = [0.229, 0.224, 0.225]
    elif model_name == 'senet154':
        model = ptm.senet154(num_classes=1000, pretrained='imagenet')
        model.last_linear = nn.Linear(model.last_linear.in_features, 2)
        aug['size'] = model.input_size[1]
        aug['mean'] = model.mean
        aug['std'] = model.std
    model.to(device)

    augs = Augmentations(**aug)
    model.aug_params = aug

    datasets = {
        'samples':
        CSVDataset(train_root,
                   train_csv,
                   'image_id',
                   'melanoma',
                   transform=augs.tf_augment,
                   add_extension='.jpg',
                   limit=(400, 433)),
        'train':
        CSVDataset(train_root,
                   train_csv,
                   'image_id',
                   'melanoma',
                   transform=augs.tf_transform,
                   add_extension='.jpg',
                   random_subset_size=limit_data),
        'val':
        CSVDatasetWithName(val_root,
                           val_csv,
                           'image_id',
                           'melanoma',
                           transform=augs.tf_transform,
                           add_extension='.jpg'),
        'test':
        CSVDatasetWithName(test_root,
                           test_csv,
                           'image_id',
                           'melanoma',
                           transform=augs.tf_transform,
                           add_extension='.jpg'),
        'test_no_aug':
        CSVDatasetWithName(test_root,
                           test_csv,
                           'image_id',
                           'melanoma',
                           transform=augs.no_augmentation,
                           add_extension='.jpg'),
        'test_144':
        CSVDatasetWithName(test_root,
                           test_csv,
                           'image_id',
                           'melanoma',
                           transform=augs.inception_crop,
                           add_extension='.jpg'),
    }

    dataloaders = {
        'train':
        DataLoader(datasets['train'],
                   batch_size=batch_size,
                   shuffle=True,
                   num_workers=num_workers,
                   worker_init_fn=set_seeds),
        'samples':
        DataLoader(datasets['samples'],
                   batch_size=batch_size,
                   shuffle=False,
                   num_workers=num_workers,
                   worker_init_fn=set_seeds),
    }

    save_images(datasets['samples'], to=AUGMENTED_IMAGES_DIR, n=32)
    sample_batch, _ = next(iter(dataloaders['samples']))
    save_image(make_grid(sample_batch, padding=0),
               os.path.join(AUGMENTED_IMAGES_DIR, 'grid.jpg'))

    criterion = nn.CrossEntropyLoss()

    optimizer = optim.SGD(model.parameters(),
                          lr=0.001,
                          momentum=0.9,
                          weight_decay=0.001)

    scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                               milestones=[10],
                                               gamma=0.1)
    metrics = {
        'train': pd.DataFrame(columns=['epoch', 'loss', 'acc', 'auc']),
        'val': pd.DataFrame(columns=['epoch', 'loss', 'acc', 'auc'])
    }

    best_val_auc = 0.0
    best_epoch = 0
    epochs_without_improvement = 0
    if images_per_epoch:
        batches_per_epoch = images_per_epoch // batch_size
    else:
        batches_per_epoch = None

    for epoch in range(epochs):
        print('train epoch {}/{}'.format(epoch + 1, epochs))
        epoch_train_result = train_epoch(device, model, dataloaders, criterion,
                                         optimizer, batches_per_epoch)

        metrics['train'] = metrics['train'].append(
            {
                **epoch_train_result, 'epoch': epoch
            }, ignore_index=True)
        print('train', epoch_train_result)

        epoch_val_result, _ = test_with_augmentation(model, datasets['val'],
                                                     device, num_workers,
                                                     val_samples)

        metrics['val'] = metrics['val'].append(
            {
                **epoch_val_result, 'epoch': epoch
            }, ignore_index=True)
        print('val', epoch_val_result)
        print('-' * 40)

        scheduler.step()

        if epoch_val_result['auc'] > best_val_auc:
            best_val_auc = epoch_val_result['auc']
            best_epoch = epoch
            epochs_without_improvement = 0
            torch.save(model, BEST_MODEL_PATH)
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement > early_stopping_patience:
            torch.save(model, LAST_MODEL_PATH)
            break

        if epoch == (epochs - 1):
            torch.save(model, LAST_MODEL_PATH)

    for phase in ['train', 'val']:
        metrics[phase].epoch = metrics[phase].epoch.astype(int)
        metrics[phase].to_csv(os.path.join(fs_observer.dir, phase + '.csv'),
                              index=False)

    # Run testing
    test_result, _ = test_with_augmentation(torch.load(BEST_MODEL_PATH),
                                            datasets['test'], device,
                                            num_workers, test_samples)
    print('test', test_result)

    test_noaug_result, _ = test_with_augmentation(torch.load(BEST_MODEL_PATH),
                                                  datasets['test_no_aug'],
                                                  device, num_workers, 1)
    print('test (no augmentation)', test_noaug_result)

    test_144crop_result, _ = test_with_augmentation(
        torch.load(BEST_MODEL_PATH), datasets['test_144'], device, num_workers,
        1)
    print('test (144-crop)', test_144crop_result)

    with open(RESULTS_CSV_PATH, 'a') as file:
        file.write(','.join((EXP_NAME, str(EXP_ID), str(best_epoch),
                             str(best_val_auc), str(test_noaug_result['auc']),
                             str(test_result['auc']),
                             str(test_144crop_result['auc']))) + '\n')

    return (test_noaug_result['auc'], test_result['auc'],
            test_144crop_result['auc'])
def main(train_root, train_csv, val_root, val_csv, test_root, test_csv,
         epochs, aug, model_name, batch_size, num_workers, val_samples,
         test_samples, early_stopping_patience, limit_data, images_per_epoch,
         split_id, _run):
    assert(model_name in ('inceptionv4', 'resnet152', 'densenet161',
                          'senet154', 'pnasnet5large', 'nasnetalarge',
                          'xception', 'squeezenet', 'resnext', 'dpn',
                          'inceptionresnetv2', 'mobilenetv2'))

    cv2.setNumThreads(0)

    AUGMENTED_IMAGES_DIR = os.path.join(fs_observer.dir, 'images')
    CHECKPOINTS_DIR = os.path.join(fs_observer.dir, 'checkpoints')
    BEST_MODEL_PATH = os.path.join(CHECKPOINTS_DIR, 'model_best.pth')
    LAST_MODEL_PATH = os.path.join(CHECKPOINTS_DIR, 'model_last.pth')
    RESULTS_CSV_PATH = os.path.join('results', 'results.csv')
    EXP_NAME = _run.meta_info['options']['--name']
    EXP_ID = _run._id

    for directory in (AUGMENTED_IMAGES_DIR, CHECKPOINTS_DIR):
        os.makedirs(directory)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if model_name == 'inceptionv4':
        model = ptm.inceptionv4(num_classes=1000, pretrained='imagenet')
        model.last_linear = nn.Linear(model.last_linear.in_features, 2)
        aug['size'] = 299
        aug['mean'] = model.mean
        aug['std'] = model.std
    elif model_name == 'resnet152':
        model = models.resnet152(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, 2)
        aug['size'] = 224
        aug['mean'] = [0.485, 0.456, 0.406]
        aug['std'] = [0.229, 0.224, 0.225]
    elif model_name == 'densenet161':
        model = models.densenet161(pretrained=True)
        model.classifier = nn.Linear(model.classifier.in_features, 2)
        aug['size'] = 224
        aug['mean'] = [0.485, 0.456, 0.406]
        aug['std'] = [0.229, 0.224, 0.225]
    elif model_name == 'senet154':
        model = ptm.senet154(num_classes=1000, pretrained='imagenet')
        model.last_linear = nn.Linear(model.last_linear.in_features, 2)
        aug['size'] = model.input_size[1]
        aug['mean'] = model.mean
        aug['std'] = model.std
    elif model_name == 'squeezenet':
        model = ptm.squeezenet1_1(num_classes=1000, pretrained='imagenet')
        model.last_conv = nn.Conv2d(
            512, 2, kernel_size=(1, 1), stride=(1, 1))
        aug['size'] = model.input_size[1]
        aug['mean'] = model.mean
        aug['std'] = model.std
    elif model_name == 'pnasnet5large':
        model = ptm.pnasnet5large(num_classes=1000, pretrained='imagenet')
        model.last_linear = nn.Linear(model.last_linear.in_features, 2)
        aug['size'] = model.input_size[1]
        aug['mean'] = model.mean
        aug['std'] = model.std
    elif model_name == 'nasnetalarge':
        model = ptm.nasnetalarge(num_classes=1000, pretrained='imagenet')
        model.last_linear = nn.Linear(model.last_linear.in_features, 2)
        aug['size'] = model.input_size[1]
        aug['mean'] = model.mean
        aug['std'] = model.std
    elif model_name == 'xception':
        model = ptm.xception(num_classes=1000, pretrained='imagenet')
        model.last_linear = nn.Linear(model.last_linear.in_features, 2)
        aug['size'] = model.input_size[1]
        aug['mean'] = model.mean
        aug['std'] = model.std
    elif model_name == 'dpn':
        model = ptm.dpn131(num_classes=1000, pretrained='imagenet')
        model.last_linear = nn.Conv2d(model.last_linear.in_channels, 2,
                                      kernel_size=1, bias=True)
        aug['size'] = model.input_size[1]
        aug['mean'] = model.mean
        aug['std'] = model.std
    elif model_name == 'resnext':
        model = ptm.resnext101_64x4d(num_classes=1000, pretrained='imagenet')
        model.last_linear = nn.Linear(model.last_linear.in_features, 2)
        aug['size'] = model.input_size[1]
        aug['mean'] = model.mean
        aug['std'] = model.std
    elif model_name == 'inceptionresnetv2':
        model = ptm.inceptionresnetv2(num_classes=1000, pretrained='imagenet')
        model.last_linear = nn.Linear(model.last_linear.in_features, 2)
        aug['size'] = model.input_size[1]
        aug['mean'] = model.mean
        aug['std'] = model.std
    elif model_name == 'mobilenetv2':
        model = MobileNetV2()
        model.load_state_dict(torch.load('./auglib/models/mobilenet_v2.pth'))
        model.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(model.last_channel, 2),
        )
        aug['size'] = 224
        aug['mean'] = [0.485, 0.456, 0.406]
        aug['std'] = [0.229, 0.224, 0.225]
    model.to(device)

    augs = Augmentations(**aug)
    model.aug_params = aug

    datasets = {
        'samples': CSVDataset(train_root, train_csv, 'image_id', 'melanoma',
                              transform=augs.tf_augment, add_extension='.jpg',
                              limit=(400, 433)),
        'train': CSVDataset(train_root, train_csv, 'image_id', 'melanoma',
                            transform=augs.tf_transform, add_extension='.jpg',
                            random_subset_size=limit_data),
        'val': CSVDatasetWithName(
            val_root, val_csv, 'image_id', 'melanoma',
            transform=augs.tf_transform, add_extension='.jpg'),
        'test': CSVDatasetWithName(
            test_root, test_csv, 'image_id', 'melanoma',
            transform=augs.tf_transform, add_extension='.jpg'),
        'test_no_aug': CSVDatasetWithName(
            test_root, test_csv, 'image_id', 'melanoma',
            transform=augs.no_augmentation, add_extension='.jpg'),
        'test_144': CSVDatasetWithName(
            test_root, test_csv, 'image_id', 'melanoma',
            transform=augs.inception_crop, add_extension='.jpg'),
    }

    dataloaders = {
        'train': DataLoader(datasets['train'], batch_size=batch_size,
                            shuffle=True, num_workers=num_workers,
                            worker_init_fn=set_seeds),
        'samples': DataLoader(datasets['samples'], batch_size=batch_size,
                              shuffle=False, num_workers=num_workers,
                              worker_init_fn=set_seeds),
    }

    save_images(datasets['samples'], to=AUGMENTED_IMAGES_DIR, n=32)
    sample_batch, _ = next(iter(dataloaders['samples']))
    save_image(make_grid(sample_batch, padding=0),
               os.path.join(AUGMENTED_IMAGES_DIR, 'grid.jpg'))

    criterion = nn.CrossEntropyLoss()

    optimizer = optim.SGD(model.parameters(),
                          lr=0.001,
                          momentum=0.9,
                          weight_decay=0.001)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1,
                                                     min_lr=1e-5,
                                                     patience=8)
    metrics = {
        'train': pd.DataFrame(columns=['epoch', 'loss', 'acc', 'auc']),
        'val': pd.DataFrame(columns=['epoch', 'loss', 'acc', 'auc'])
    }

    best_val_auc = 0.0
    best_epoch = 0
    epochs_without_improvement = 0
    if images_per_epoch:
        batches_per_epoch = images_per_epoch // batch_size
    else:
        batches_per_epoch = None

    for epoch in range(epochs):
        print('train epoch {}/{}'.format(epoch+1, epochs))
        epoch_train_result = train_epoch(
            device, model, dataloaders, criterion, optimizer,
            batches_per_epoch)

        metrics['train'] = metrics['train'].append(
            {**epoch_train_result, 'epoch': epoch}, ignore_index=True)
        print('train', epoch_train_result)

        epoch_val_result, _ = test_with_augmentation(
            model, datasets['val'], device, num_workers, val_samples)

        metrics['val'] = metrics['val'].append(
            {**epoch_val_result, 'epoch': epoch}, ignore_index=True)
        print('val', epoch_val_result)
        print('-' * 40)

        scheduler.step(epoch_val_result['loss'])

        if epoch_val_result['auc'] > best_val_auc:
            best_val_auc = epoch_val_result['auc']
            best_val_result = epoch_val_result
            best_epoch = epoch
            epochs_without_improvement = 0
            torch.save(model, BEST_MODEL_PATH)
        else:
            epochs_without_improvement += 1

        if epochs_without_improvement > early_stopping_patience:
            last_val_result = epoch_val_result
            torch.save(model, LAST_MODEL_PATH)
            break

        if epoch == (epochs-1):
            last_val_result = epoch_val_result
            torch.save(model, LAST_MODEL_PATH)

    for phase in ['train', 'val']:
        metrics[phase].epoch = metrics[phase].epoch.astype(int)
        metrics[phase].to_csv(os.path.join(fs_observer.dir, phase + '.csv'),
                              index=False)

    # Run testing
    # TODO: reduce code repetition
    test_result, preds = test_with_augmentation(
        torch.load(BEST_MODEL_PATH), datasets['test'], device,
        num_workers, test_samples)
    print('[best] test', test_result)

    test_noaug_result, preds_noaug = test_with_augmentation(
        torch.load(BEST_MODEL_PATH), datasets['test_no_aug'], device,
        num_workers, 1)
    print('[best] test (no augmentation)', test_noaug_result)

    test_result_last, preds_last = test_with_augmentation(
        torch.load(LAST_MODEL_PATH), datasets['test'], device,
        num_workers, test_samples)
    print('[last] test', test_result_last)

    test_noaug_result_last, preds_noaug_last = test_with_augmentation(
        torch.load(LAST_MODEL_PATH), datasets['test_no_aug'], device,
        num_workers, 1)
    print('[last] test (no augmentation)', test_noaug_result_last)

    # Save predictions
    preds.to_csv(os.path.join(fs_observer.dir, 'test-aug-best.csv'),
                 index=False, columns=['image', 'label', 'score'])
    preds_noaug.to_csv(os.path.join(fs_observer.dir, 'test-noaug-best.csv'),
                 index=False, columns=['image', 'label', 'score'])
    preds_last.to_csv(os.path.join(fs_observer.dir, 'test-aug-last.csv'),
                 index=False, columns=['image', 'label', 'score'])
    preds_noaug_last.to_csv(os.path.join(fs_observer.dir, 'test-noaug-last.csv'),
                 index=False, columns=['image', 'label', 'score'])

    # TODO: Avoid repetition.
    #       use ordereddict, or create a pandas df before saving
    with open(RESULTS_CSV_PATH, 'a') as file:
        file.write(','.join((
            EXP_NAME,
            str(EXP_ID),
            str(split_id),
            str(best_epoch),
            str(best_val_result['loss']),
            str(best_val_result['acc']),
            str(best_val_result['auc']),
            str(best_val_result['avp']),
            str(best_val_result['sens']),
            str(best_val_result['spec']),
            str(last_val_result['loss']),
            str(last_val_result['acc']),
            str(last_val_result['auc']),
            str(last_val_result['avp']),
            str(last_val_result['sens']),
            str(last_val_result['spec']),
            str(best_val_auc),
            str(test_result['auc']),
            str(test_result_last['auc']),
            str(test_result['acc']),
            str(test_result_last['acc']),
            str(test_result['spec']),
            str(test_result_last['spec']),
            str(test_result['sens']),
            str(test_result_last['sens']),
            str(test_result['avp']),
            str(test_result_last['avp']),
            str(test_noaug_result['auc']),
            str(test_noaug_result_last['auc']),
            str(test_noaug_result['acc']),
            str(test_noaug_result_last['acc']),
            str(test_noaug_result['spec']),
            str(test_noaug_result_last['spec']),
            str(test_noaug_result['sens']),
            str(test_noaug_result_last['sens']),
            str(test_noaug_result['avp']),
            str(test_noaug_result_last['avp']),
            )) + '\n')

    return (test_noaug_result['auc'],
            test_result['auc'],
            )