def predict_test_majority():
    """
    Majority voting method.
    """
    labels = np.empty((len(models), 61191, 17))
    for m_idx, model in enumerate(models):
        name = str(model).split()[1]
        print('predicting model {}'.format(name))
        net = nn.DataParallel(model().cuda())
        net.load_state_dict(
            torch.load(
                '/mnt/home/dunan/Learn/Kaggle/planet_amazon/model/full_data_{}_10xlr.pth'
                .format(name)))
        net.eval()
        preds = np.zeros((61191, 17))
        for t in transforms:
            test_dataloader.dataset.images = t(test_dataloader.dataset.images)
            print(t, name)
            pred = predict(net, dataloader=test_dataloader)
            preds = preds + pred
        # get predictions for the single model
        preds = preds / len(transforms)
        np.savetxt(
            '/mnt/home/dunan/Learn/Kaggle/planet_amazon/submission_probs/full_data_{}_10xlr_224and256.txt'
            .format(name), preds)
        # get labels
        preds = (preds > thresholds[name]).astype(int)
        labels[m_idx] = preds

    # majority voting
    labels = labels.sum(axis=0)
    labels = (labels >= (len(models) // 2)).astype(int)
    pred_csv(predictions=labels,
             name='majority_voting_ensembles_split_data_10xlr_224and256')
def predict_test_averaging(t):
    preds = np.zeros((61191, 17))
    # imgs = test_dataloader.dataset.images.copy()
    # iterate over models
    for index, model in enumerate(models):
        name = str(model).split()[1]
        net = nn.DataParallel(model().cuda())
        net.load_state_dict(
            torch.load(
                '/mnt/home/dunan/Learn/Kaggle/planet_amazon/model/{}.pth'.
                format(name)))
        net.eval()
        # iterate over transformations
        for transformation in transforms:
            # imgs = transformation(imgs)
            test_dataloader.dataset.images = transformation(
                test_dataloader.dataset.images)
            pred = predict(dataloader=test_dataloader, net=net)
            preds = preds + pred

    preds = preds / (len(models) * len(transforms))
    # preds = preds / len(models)
    pred_csv(predictions=preds,
             threshold=t,
             name='transforms-res50-152dense-ensembels')
Ejemplo n.º 3
0
def test():
    net = nn.DataParallel(densenet161().cuda())
    net.load_state_dict(torch.load('models/densenet161.pth'))
    net.eval()

    dataset = KgForestDataset(split='test-61191',
                              transform=Compose([
                                  Lambda(lambda x: toTensor(x)),
                                  Normalize(mean=mean, std=std)
                              ]),
                              height=256,
                              width=256,
                              label_csv=None)

    test_loader = DataLoader(dataset,
                             batch_size=512,
                             shuffle=False,
                             pin_memory=True)
    probs = predict(net, test_loader)

    # probs = np.empty((61191, 17))
    # current = 0
    # for batch_idx, (images, im_ids) in enumerate(test_loader):
    #     num = images.size(0)
    #     previous = current
    #     current = previous + num
    #     logits = net(Variable(images.cuda(), volatile=True))
    #     prob = F.sigmoid(logits)
    #     probs[previous:current, :] = prob.data.cpu().numpy()
    #     print('Batch Index ', batch_idx)

    pred_csv(probs, name='densenet161', threshold=BEST_THRESHOLD)
Ejemplo n.º 4
0
def test_majority_blender():
    label_files = [f for f in glob.glob('submission_probs/*.txt') if 'blender' in f]
    label = np.zeros((61191, 17))
    for f in label_files:
        l = np.loadtxt(f)
        label = l + label
    label = (label >= len(transforms)).astype(int)
    pred_csv(label, 'blender')
def predict_test(t):
    preds = np.zeros((61191, 17))
    for index, model in enumerate(models):
        name = str(model).split()[1]
        net = nn.DataParallel(model().cuda())
        net.eval()
        net.load_state_dict(torch.load('models/{}.pth'.format(name)))
        pred = predict(dataloader=test_dataloader, net=net)
        preds = preds + pred

    preds = preds / len(models)
    pred_csv(predictions=preds, threshold=t, name='ensembles')
def predict_test_majority():
    """
    Majority voting method.
    """
    labels = np.empty((len(models), 61191, 17))
    for m_idx, model in enumerate(models):
        print('Loading name {}'.format(model))
        name = str(model).split()[1]
        # /home/jxu7/Research/planet-competition/submission_preds/full_data_resnet34_planet.txt
        preds = np.loadtxt('submission_preds/full_data_{}.txt'.format(name))
        labels[m_idx] = preds
    labels = np.sum(labels, axis=0)
    if len(models) % 2 == 0:
        labels = (labels > (len(models) // 2)).astype(int)
    elif len(models) == 1:
        labels = labels
    else:
        labels = (labels >= (len(models) // 2)).astype(int)

    # for m_idx, model in enumerate(models):
    #     name = str(model).split()[1]
    #     threshold = thresholds[name]
    #     print('predicting model {}'.format(name))
    #     print('threshold is', threshold)
    #     net = nn.DataParallel(model().cuda())
    #     net.load_state_dict(torch.load('models/full_data_{}.pth'.format(name)))
    #     net.eval()
    #     preds = np.zeros((61191, 17))
    #     for t in transforms:
    #         test_dataloader.dataset.images = t(test_dataloader.dataset.images)
    #         print(t, name)
    #         p = predict(net, dataloader=test_dataloader)
    #         preds = preds + (p > threshold).astype(int)
    #     # get predictions for the single model
    #     # preds = preds/len(transforms)
    #     # np.savetxt('submission_probs/full_data_{}.txt'.format(name), preds)
    #     # get labels
    #     # preds = (preds > thresholds[m_idx]).astype(int)
    #     preds = (preds > (len(transforms)//2)).astype(int)
    #     np.savetxt('submission_preds/full_data_{}.txt'.format(name), preds)
    #     labels[m_idx] = preds

    # majority voting
    # labels = labels.sum(axis=0)
    # labels = (labels >= (len(models)//2)).astype(int)
    pred_csv(predictions=labels, name='resnet101_resnet152_densenets')
Ejemplo n.º 7
0
def predict_test_majority():
    """
    Majority voting method.
    """
    labels = np.empty((len(models), 61191, 17))
    for m_idx, model in enumerate(models):
        print('Loading name {}'.format(model))
        name = str(model).split()[1]
        # /home/jxu7/Research/planet-competition/submission_preds/full_data_resnet34_planet.txt
        preds = np.loadtxt('/mnt/home/dunan/Learn/Kaggle/planet_amazon/submission_probs/full_data_{}_single_10xlr_224and256.txt'.format(name))
        labels[m_idx] = preds
    labels = np.sum(labels, axis=0)
    if len(models) % 2 == 0:
        labels = (labels > (len(models)//2)).astype(int)
    elif len(models) == 1:
        labels = labels
    else:
        labels = (labels >= (len(models) // 2)).astype(int)

    pred_csv(predictions=labels, name='resnet50152_densenet161169201')
def predict_test_averaging(t):
    preds = np.zeros((61191, 17))
    # imgs = test_dataloader.dataset.images.copy()
    # iterate over models
    for index, model in enumerate(models):
        name = str(model).split()[1]
        net = nn.DataParallel(model().cuda())
        net.load_state_dict(torch.load('models/{}.pth'.format(name)))
        net.eval()
        # iterate over transformations
        for transformation in transforms:
            # imgs = transformation(imgs)
            test_dataloader.dataset.images = transformation(
                test_dataloader.dataset.images)
            pred = predict(dataloader=test_dataloader, net=net)
            preds = preds + pred

    preds = preds / (len(models) * len(transforms))
    # preds = preds / len(models)
    np.savetxt('submission_probs/fpn_152', preds)
    pred_csv(predictions=preds, threshold=t, name='fpn-152')
Ejemplo n.º 9
0
def predict_test(t):
    preds = np.zeros((61191, 17))
    # imgs = test_dataloader.dataset.images.copy()
    # iterate over models
    for index, model in enumerate(models):
        name = str(model).split()[1]
        net = nn.DataParallel(model().cuda())
        net.load_state_dict(torch.load('models/{}.pth'.format(name)))
        net.eval()
        # iterate over transformations
        for transformation in transforms:
            # imgs = transformation(imgs)
            test_dataloader.dataset.images = transformation(
                test_dataloader.dataset.images)
            pred = predict(dataloader=test_dataloader, net=net)
            preds = preds + pred

    preds = preds / (len(models) * len(transforms))
    # preds = preds / len(models)
    pred_csv(predictions=preds,
             threshold=t,
             name='transforms-resnet152_densenet161_densent169-ensembels')
Ejemplo n.º 10
0
        0.22366667, 0.20366667, 0.17166667, 0.14633333, 0.20066667, 0.18966667,
        0.197, 0.20166667, 0.17233333, 0.21466667, 0.15566667, 0.197,
        0.16366667, 0.149, 0.25366667, 0.18333333, 0.26033333
    ]
]

models = [
    resnet18_planet,
    resnet34_planet,
    resnet50_planet,
    resnet152_planet,
    densenet121,
    densenet161,
    densenet169,
]

labels = np.empty((len(models), 61191, 17))
for m_idx, model in enumerate(models):
    name = str(model).split()[1]
    preds = np.loadtxt(
        '/mnt/home/dunan/Learn/Kaggle/planet_amazon/submission_probs/full_data_{}.txt'
        .format(name))
    # get labels
    preds = (preds > thresholds[m_idx]).astype(int)
    labels[m_idx] = preds

# majority voting
labels = labels.sum(axis=0)
labels = (labels >= (len(models) // 2)).astype(int)
pred_csv(predictions=labels, name='majority_voting_ensembles_full_data')
Ejemplo n.º 11
0
import pandas as pd
import numpy as np
import glob
from data import kgdataset
from data.kgdataset import KgForestDataset

kgdataset.KAGGLE_DATA_DIR = '../../../kaggle'
from util import name_idx, pred_csv

filenames = glob.glob('submissions/*.csv')
print('\n'.join(filenames))

# read all csv files and convert tags to labels
dfs = [pd.read_csv(name) for name in filenames]
labels = np.empty((8, 61191, 17))
for df_idx, df in enumerate(dfs):
    print(df_idx)
    for row, tag in enumerate(df['tags']):
        label = np.zeros(17)
        idx = [name_idx()[name] for name in tag.split(' ')]
        label[idx] = 1
        labels[df_idx, row, :] = label

majority_voting = labels.sum(axis=0)
majority_voting = (majority_voting >= 4).astype(int)

pred_csv(majority_voting, name='sub_ensembles')