Esempio n. 1
0
def test_loader_split():
    full = MafatDataset('data/train.csv', 'data/answer.csv', 'data/training imagery', preload=True)
    train = MafatDataset('data/train.csv', 'data/answer.csv', 'data/training imagery', preload=True, start=0, end=0.8)
    val = MafatDataset('data/train.csv', 'data/answer.csv', 'data/training imagery', preload=True, start=0.8, end=1)
    assert len(np.unique(full.dat['image_id'])) > len(np.unique(train.dat['image_id']))
    assert len(np.unique(full.dat['image_id'])) > len(np.unique(val.dat['image_id']))
    assert 0.78 < float(len(train.dat))/len(full.dat) < 0.82
    assert 0.18 < float(len(val.dat))/len(full.dat) < 0.22
Esempio n. 2
0
def test_loader():
    data = MafatDataset('data/train.csv', 'data/answer.csv', 'data/training imagery', preload=False,augment=False)
    print len(data)
    for i in [3977, 49, 300, 500]:
        im, l, text = data.__getitem__(i)
        print 'label', l
        print text
        assert len(l)==37 or len(l)==0
        if DISPLAY:
            plt.imshow(im[0],cmap='gray')
            plt.title('%d'%i)
            plt.show()
Esempio n. 3
0
def test_loader_mask():
    create_config_file('data/train.csv')
    dataset_args=dict(mask_detection=True, boarder_ratio=5, patch_size=224)
    data = MafatDataset('data/train.csv', 'data/answer.csv', 'data/training imagery', preload=False, **dataset_args)
    print len(data)
    for i in [3978, 47, 301, 530, 100, 410]:
        im, l, text = data.__getitem__(i)
        print 'label', l
        print text
        assert len(l)==37 or len(l)==0
        if DISPLAY:
            plt.imshow(im[0],cmap='gray')
            plt.title('%d'%i)
            plt.show()
Esempio n. 4
0
def test_loader_split_byoverlap():
    full = MafatDataset('data/train.csv', 'data/answer.csv', 'data/training imagery', preload=True)
    train, val = create_train_val_dataset('data/train.csv', 'data/answer.csv', 'data/training imagegy', image_group_file='data/train_pairs.yaml', preload=True)
    assert len(np.unique(full.dat['image_id'])) > len(np.unique(train.dat['image_id']))
    assert len(np.unique(full.dat['image_id'])) > len(np.unique(val.dat['image_id']))
    assert 0.78 < float(len(train.dat))/len(full.dat) < 0.82
    assert 0.18 < float(len(val.dat))/len(full.dat) < 0.22
Esempio n. 5
0
def init_dataset():
    print 'loading Dataset'
    dataset_args = {}
    if args.context:
        dataset_args.update(
            dict(mask_detection=True, boarder_ratio=5, patch_size=224))
    if args.normalize_size:
        dataset_args.update(dict(normalize_size=True))
    if args.normalize_rotation:
        dataset_args.update(dict(normalize_rotation=True))
    if args.no_augment is True:
        dataset_args.update(dict(augment=False))

    if args.no_split:
        train_dataset = MafatDataset('data/train.csv', 'data/answer.csv',
                                     'data/training imagery', args.preload,
                                     **dataset_args)
        val_dataset = train_dataset
    else:
        train_dataset, val_dataset = create_train_val_dataset(
            'data/test.csv',
            'data/answer.csv',
            'data/test imagery',
            image_group_file=args.image_group_file,
            preload=args.preload,
            **dataset_args)
    return train_dataset, val_dataset
Esempio n. 6
0
def evaluate():
    output_file = 'answer_%s.csv' % args.tag
    model, _, _ = load_model()
    model.eval()
    dataset_args = {}
    if args.context:
        dataset_args.update(
            dict(mask_detection=True, boarder_ratio=5, patch_size=224))
    if args.normalize_size:
        dataset_args.update(dict(normalize_size=True))
    if args.normalize_rotation:
        dataset_args.update(dict(normalize_rotation=True))
    dataset = MafatDataset('data/test.csv',
                           'data/answer.csv',
                           'data/test imagery',
                           preload=args.preload,
                           augment=False,
                           **dataset_args)
    writer = SummaryWriter('runs/%s/%s' % (args.architect, args.tag))
    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size=args.batch_size,
                                               shuffle=False,
                                               num_workers=args.workers,
                                               collate_fn=collate_fn)
    sigmoid = nn.Sigmoid()
    collector = PredictionCollector(dataset.get_class_names())
    with torch.no_grad():
        for it, data in enumerate(train_loader):
            images, labels, gt_text = data
            X = Variable(images).cuda()
            outputs = model(X)
            prediction = sigmoid(outputs)
            ids = [int(text.split(',')[0]) for text in gt_text]
            collector.add(ids, prediction)

        pred_text = map(dataset.labels_to_text,
                        prediction.detach().cpu().numpy()[:16])
        grid = display_images(X[:16], gt_text[:16], pred_text[:16], nrow=4)
        cv2.imsave(grid.numpy()[0], 'test_grid.png')
        writer.add_image('Test/images', grid, it)
        collector.save(output_file)
Esempio n. 7
0
def test_collector():
    dataset = MafatDataset('data/train.csv', 'data/answer.csv', 'data/training imagery', preload=False)
    loader = torch.utils.data.DataLoader(dataset, batch_size=8, shuffle=False, num_workers=0)#args.workers)
    collector = PredictionCollector(dataset.get_class_names(), True)
    for data in loader:
        images,labels, gt_text = data
        labels = labels.detach().numpy()
        ids = [int(text.split(',')[0]) for text in gt_text]
        collector.add(ids, labels, labels)
        assert collector.output['dedicated agricultural vehicle'][24690]==1
        break
    assert len(collector.output) == 8
    map, keys = collector.calc_map()
    print 'map ',map
    import ipdb; ipdb.set_trace()
    by_prob = collector.save('data/answer_v0.csv')
    assert np.all(by_prob['small vehicle']!=by_prob['large vehicle'])
    assert np.all([len(np.unique(by_prob[key])) == by_prob.shape[0] for key in by_prob.keys()])
    print by_prob['dedicated agricultural vehicle']
    assert by_prob['dedicated agricultural vehicle'].iloc[0]==24690
    print by_prob
Esempio n. 8
0
            np.abs(m2.dx - m.dx) < 10 and np.abs(m2.dy - m.dy) < 5
            for m2 in matches
        ])
        if inliers > best_inliers:
            best_inliers = inliers
            best_ind = i

    if best_inliers >= 2:
        print 'found matching images '
        return matches[best_ind].dx, matches[best_ind].dy

    return None


dataset = MafatDataset('data/train.csv',
                       'data/answer.csv',
                       'data/training imagery',
                       preload=False)
train = dataset.dat
print np.unique(train[features_keys])

if os.path.exists('/tmp/features.npy'):
    print 'reading from file'
    features_ = np.load('/tmp/features.npy')
else:
    features = [dataset.row_to_label(train.iloc[i]) for i in range(len(train))]
    features_ = np.array(features).T
    np.save('/tmp/features.npy', features_)

#every detection is now reduced to a singe index "encoding". val is the deteciton, but we don't care about that
val, encoding = np.unique(features_, axis=1, return_inverse=True)
for i, r in enumerate(encoding):
Esempio n. 9
0
def test_loader_weights():
    full = MafatDataset('data/train.csv', 'data/answer.csv', 'data/training imagery', preload=False)
    w,c = full.get_weights()
    print w 
    assert len(w)==len(full)
Esempio n. 10
0
import numpy as np
import matplotlib.pyplot as plt

parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
parser.add_argument('--category', default='color', help='what class/category')
parser.add_argument('--answer', help='answer csv file (ranking)')

parser.add_argument('-n',
                    default=1,
                    type=int,
                    help='how many examples to show. -1 to show all')
args = parser.parse_args()
dataset = MafatDataset('data/test.csv',
                       'data/answer.csv',
                       'data/test imagery',
                       False,
                       resize=False,
                       patch_size=224,
                       augment=False)
dat = pd.read_csv(args.answer)

ids = dat[args.category][:args.n]
trans = transforms.ToPILImage()

ims = [
    np.array(
        trans(
            dataset.__getitem__(
                np.where(dataset.dat['tag_id'] == i)[0][0])[0])) for i in ids
]
n = int(np.sqrt(len(ims)))