Example #1
0
def main(ckpt, tocsv=False, save=False, mask=False, target='test', toiou=False):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # load one or more checkpoint
    models = []
    for fn in ckpt or [None]:
        # load model
        model = load_ckpt(filepath=fn)
        if not model:
            print("Aborted: checkpoint {} not found!".format(fn))
            return
        # Sets the model in evaluation mode.
        model.eval()
        # put model to GPU
        if torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            model = nn.DataParallel(model)
        model = model.to(device)
        # append to model list
        models.append(model)

    resize = not config['valid'].getboolean('pred_orig_size')
    compose = Compose(augment=False, resize=resize)
    # decide which dataset to pick sample
    data_dir = os.path.join('data', target)
    if target == 'test':
        dataset = KaggleDataset(data_dir, transform=compose)
    elif os.path.exists('data/valid'):
        # advance mode: use valid folder as CV
        dataset = KaggleDataset(data_dir, transform=compose)
    else:
        # auto mode: split part of train dataset as CV
        dataset = KaggleDataset('data/train', transform=compose, use_filter=True)
        if target == 'train':
            dataset, _ = dataset.split()
        elif target == 'valid':
            _, dataset = dataset.split()

    # iterate dataset and inference each sample
    ious = []
    writer = csvfile = None
    for data in tqdm(dataset):
        with torch.no_grad():
            uid, y, y_c, y_m = inference(data, models, resize)
            x, gt, gt_s, gt_c, gt_m = unpack_data(data, compose, resize)

        if tocsv:
            if writer is None:
                csvfile = open('result.csv', 'w')
                writer = csv.writer(csvfile)
                writer.writerow(['ImageId', 'EncodedPixels'])
            for rle in prob_to_rles(y, y_c, y_m):
                writer.writerow([uid, ' '.join([str(i) for i in rle])])
        elif toiou:
            assert target != 'test'
            if writer is None:
                csvfile = open('iou.csv', 'w')
                writer = csv.writer(csvfile)
                writer.writerow(['ImageId', 'IoU'])
            iou = get_iou(y, y_c, y_m, gt)
            writer.writerow([uid, iou])
            ious.append(iou)
        elif mask:
            save_mask(uid, y, y_c, y_m)
        elif target == 'test':
            show(uid, x, y, y_c, y_m, save)
        else: # train or valid
            show_groundtruth(uid, x, y, y_c, y_m, gt, gt_s, gt_c, gt_m, save)

    # end of for-loop
    if csvfile is not None:
        csvfile.close()
    if toiou:
        print('\nIoU Metrics:\n mean: {0:.4f}\t std: {1:.4f}\t max: {2:.4f}\t min: {3:.4f}\t count: {4}\n'
            .format(np.mean(ious), np.std(ious), np.max(ious), np.min(ious), len(ious)))
Example #2
0
def main(resume=True, n_epoch=None, learn_rate=None):
    model_name = config['param']['model']
    if learn_rate is None:
        learn_rate = config['param'].getfloat('learn_rate')
    width = config.getint(model_name, 'width')
    weight_map = config['param'].getboolean('weight_map')
    c = config['train']
    log_name = c.get('log_name')
    n_batch = c.getint('n_batch')
    n_worker = c.getint('n_worker')
    n_cv_epoch = c.getint('n_cv_epoch')
    if n_epoch is None:
        n_epoch = c.getint('n_epoch')
    balance_group = c.getboolean('balance_group')
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model = build_model(model_name)
    model = model.to(device)

    # define optimizer
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=args.learn_rate,
        weight_decay=1e-6
        )

    # dataloader workers are forked process thus we need a IPC manager to keep cache in same memory space
    manager = Manager()
    cache = manager.dict()
    compose = Compose()
    # prepare dataset
    if os.path.exists('data/valid'):
        # advance mode: use valid folder as CV
        train_dataset = KaggleDataset('data/train', transform=compose, cache=cache)
        valid_dataset = KaggleDataset('data/valid', transform=compose, cache=cache)
    else:
        # auto mode: split part of train dataset as CV
        train_dataset = KaggleDataset('data/train', transform=compose, cache=cache, use_filter=True)
        train_dataset, valid_dataset = train_dataset.split()
    # decide whether to balance training set
    if balance_group:
        weights, ratio = train_dataset.class_weight()
        # Len of weights is number of original epoch samples. 
        # After oversample balance, majority class will be under-sampled (least sampled)
        # Multipling raito is to gain chance for each sample to be visited at least once in each epoch 
        sampler = WeightedRandomSampler(weights, int(len(weights) * ratio))
    else:
        sampler = RandomSampler(train_dataset)
    # data loader
    train_loader = DataLoader(
        train_dataset,
        sampler=sampler,
        batch_size=n_batch,
        num_workers=n_worker,
        pin_memory=torch.cuda.is_available())
    valid_loader = DataLoader(
        valid_dataset,
        shuffle=False,
        batch_size=n_batch,
        num_workers=n_worker)

    # resume checkpoint
    start_epoch = iou_tr = iou_cv = 0
    if resume:
        start_epoch = load_ckpt(model, optimizer)
    if start_epoch == 0:
        print('Grand new training ...')

    # put model to GPU
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)

    # decide log directory name
    log_dir = os.path.join(
        'logs', log_name, '{}-{}'.format(model_name, width),
        'ep_{},{}-lr_{}'.format(
            start_epoch,
            n_epoch + start_epoch,
            learn_rate,
        )
    )

    with SummaryWriter(log_dir) as writer:
        if start_epoch == 0 and False:
            # dump graph only for very first training, disable by default
            dump_graph(model, writer, n_batch, width)
        print('Training started...')
        for epoch in range(start_epoch + 1, n_epoch + start_epoch + 1): # 1 base
            iou_tr = train(train_loader, model, optimizer, epoch, writer)
            if len(valid_dataset) > 0 and epoch % n_cv_epoch == 0:
                with torch.no_grad():
                    iou_cv = valid(valid_loader, model, epoch, writer, len(train_loader))
            save_ckpt(model, optimizer, epoch, iou_tr, iou_cv)
        print('Training finished...')
Example #3
0
def main(resume=True, n_epoch=None, learn_rate=None):
    model_name = config['param']['model']
    cv_ratio = config['param'].getfloat('cv_ratio')
    if learn_rate is None:
        learn_rate = config['param'].getfloat('learn_rate')
    width = config[model_name].getint('width')
    weight_map = config['param'].getboolean('weight_map')
    c = config['train']
    log_name = c.get('log_name')
    n_batch = c.getint('n_batch')
    n_worker = c.getint('n_worker')
    n_ckpt_epoch = c.getint('n_ckpt_epoch')
    if n_epoch is None:
        n_epoch = c.getint('n_epoch')

    # initialize model
    if model_name == 'unet_vgg16':
        model = UNetVgg16(3, 1, fixed_vgg=True)
    elif model_name == 'dcan':
        model = DCAN(3, 1)
    elif model_name == 'caunet':
        model = CAUNet()
    elif model_name == 'camunet':
        model = CAMUNet()
    else:
        model = UNet()

    if torch.cuda.is_available():
        model = model.cuda()
        # model = torch.nn.DataParallel(model).cuda()

    # define optimizer
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=args.learn_rate,
        weight_decay=1e-6
        )

    # dataloader workers are forked process thus we need a IPC manager to keep cache in same memory space
    manager = Manager()
    cache = manager.dict()
    # prepare dataset and loader
    dataset = KaggleDataset('data/stage1_train', transform=Compose(), cache=cache)
    train_idx, valid_idx = dataset.split()
    train_loader = DataLoader(
        dataset, sampler=SubsetRandomSampler(train_idx),
        batch_size=n_batch,
        num_workers=n_worker,
        pin_memory=torch.cuda.is_available())
    valid_loader = DataLoader(
        dataset, sampler=SubsetRandomSampler(valid_idx),
        batch_size=n_batch,
        num_workers=n_worker)

    # resume checkpoint
    start_epoch = 0
    if resume:
        start_epoch = load_ckpt(model, optimizer)
    if start_epoch == 0:
        print('Grand new training ...')

    # decide log directory name
    log_dir = os.path.join(
        'logs', log_name, '{}-{}'.format(model_name, width),
        'ep_{},{}-lr_{}'.format(
            start_epoch,
            n_epoch + start_epoch,
            learn_rate,
        )
    )

    with SummaryWriter(log_dir) as writer:
        if start_epoch == 0 and False:
            # dump graph only for very first training, disable by default
            dump_graph(model, writer, n_batch, width)
        print('Training started...')
        for epoch in range(start_epoch, n_epoch + start_epoch):
            train(train_loader, model, optimizer, epoch, writer)
            if cv_ratio > 0 and epoch % 3 == 2:
                valid(valid_loader, model, epoch, writer, len(train_loader))
            # save checkpoint per n epoch
            if epoch % n_ckpt_epoch == n_ckpt_epoch - 1:
                save_ckpt(model, optimizer, epoch+1)
        print('Training finished...')