def predict_test_majority(): """ Majority voting method. """ labels = np.empty((len(models), 61191, 17)) for m_idx, model in enumerate(models): name = str(model).split()[1] print('predicting model {}'.format(name)) net = nn.DataParallel(model().cuda()) net.load_state_dict( torch.load( '/mnt/home/dunan/Learn/Kaggle/planet_amazon/model/full_data_{}_10xlr.pth' .format(name))) net.eval() preds = np.zeros((61191, 17)) for t in transforms: test_dataloader.dataset.images = t(test_dataloader.dataset.images) print(t, name) pred = predict(net, dataloader=test_dataloader) preds = preds + pred # get predictions for the single model preds = preds / len(transforms) np.savetxt( '/mnt/home/dunan/Learn/Kaggle/planet_amazon/submission_probs/full_data_{}_10xlr_224and256.txt' .format(name), preds) # get labels preds = (preds > thresholds[name]).astype(int) labels[m_idx] = preds # majority voting labels = labels.sum(axis=0) labels = (labels >= (len(models) // 2)).astype(int) pred_csv(predictions=labels, name='majority_voting_ensembles_split_data_10xlr_224and256')
def predict_test_averaging(t): preds = np.zeros((61191, 17)) # imgs = test_dataloader.dataset.images.copy() # iterate over models for index, model in enumerate(models): name = str(model).split()[1] net = nn.DataParallel(model().cuda()) net.load_state_dict( torch.load( '/mnt/home/dunan/Learn/Kaggle/planet_amazon/model/{}.pth'. format(name))) net.eval() # iterate over transformations for transformation in transforms: # imgs = transformation(imgs) test_dataloader.dataset.images = transformation( test_dataloader.dataset.images) pred = predict(dataloader=test_dataloader, net=net) preds = preds + pred preds = preds / (len(models) * len(transforms)) # preds = preds / len(models) pred_csv(predictions=preds, threshold=t, name='transforms-res50-152dense-ensembels')
def test(): net = nn.DataParallel(densenet161().cuda()) net.load_state_dict(torch.load('models/densenet161.pth')) net.eval() dataset = KgForestDataset(split='test-61191', transform=Compose([ Lambda(lambda x: toTensor(x)), Normalize(mean=mean, std=std) ]), height=256, width=256, label_csv=None) test_loader = DataLoader(dataset, batch_size=512, shuffle=False, pin_memory=True) probs = predict(net, test_loader) # probs = np.empty((61191, 17)) # current = 0 # for batch_idx, (images, im_ids) in enumerate(test_loader): # num = images.size(0) # previous = current # current = previous + num # logits = net(Variable(images.cuda(), volatile=True)) # prob = F.sigmoid(logits) # probs[previous:current, :] = prob.data.cpu().numpy() # print('Batch Index ', batch_idx) pred_csv(probs, name='densenet161', threshold=BEST_THRESHOLD)
def test_majority_blender(): label_files = [f for f in glob.glob('submission_probs/*.txt') if 'blender' in f] label = np.zeros((61191, 17)) for f in label_files: l = np.loadtxt(f) label = l + label label = (label >= len(transforms)).astype(int) pred_csv(label, 'blender')
def predict_test(t): preds = np.zeros((61191, 17)) for index, model in enumerate(models): name = str(model).split()[1] net = nn.DataParallel(model().cuda()) net.eval() net.load_state_dict(torch.load('models/{}.pth'.format(name))) pred = predict(dataloader=test_dataloader, net=net) preds = preds + pred preds = preds / len(models) pred_csv(predictions=preds, threshold=t, name='ensembles')
def predict_test_majority(): """ Majority voting method. """ labels = np.empty((len(models), 61191, 17)) for m_idx, model in enumerate(models): print('Loading name {}'.format(model)) name = str(model).split()[1] # /home/jxu7/Research/planet-competition/submission_preds/full_data_resnet34_planet.txt preds = np.loadtxt('submission_preds/full_data_{}.txt'.format(name)) labels[m_idx] = preds labels = np.sum(labels, axis=0) if len(models) % 2 == 0: labels = (labels > (len(models) // 2)).astype(int) elif len(models) == 1: labels = labels else: labels = (labels >= (len(models) // 2)).astype(int) # for m_idx, model in enumerate(models): # name = str(model).split()[1] # threshold = thresholds[name] # print('predicting model {}'.format(name)) # print('threshold is', threshold) # net = nn.DataParallel(model().cuda()) # net.load_state_dict(torch.load('models/full_data_{}.pth'.format(name))) # net.eval() # preds = np.zeros((61191, 17)) # for t in transforms: # test_dataloader.dataset.images = t(test_dataloader.dataset.images) # print(t, name) # p = predict(net, dataloader=test_dataloader) # preds = preds + (p > threshold).astype(int) # # get predictions for the single model # # preds = preds/len(transforms) # # np.savetxt('submission_probs/full_data_{}.txt'.format(name), preds) # # get labels # # preds = (preds > thresholds[m_idx]).astype(int) # preds = (preds > (len(transforms)//2)).astype(int) # np.savetxt('submission_preds/full_data_{}.txt'.format(name), preds) # labels[m_idx] = preds # majority voting # labels = labels.sum(axis=0) # labels = (labels >= (len(models)//2)).astype(int) pred_csv(predictions=labels, name='resnet101_resnet152_densenets')
def predict_test_majority(): """ Majority voting method. """ labels = np.empty((len(models), 61191, 17)) for m_idx, model in enumerate(models): print('Loading name {}'.format(model)) name = str(model).split()[1] # /home/jxu7/Research/planet-competition/submission_preds/full_data_resnet34_planet.txt preds = np.loadtxt('/mnt/home/dunan/Learn/Kaggle/planet_amazon/submission_probs/full_data_{}_single_10xlr_224and256.txt'.format(name)) labels[m_idx] = preds labels = np.sum(labels, axis=0) if len(models) % 2 == 0: labels = (labels > (len(models)//2)).astype(int) elif len(models) == 1: labels = labels else: labels = (labels >= (len(models) // 2)).astype(int) pred_csv(predictions=labels, name='resnet50152_densenet161169201')
def predict_test_averaging(t): preds = np.zeros((61191, 17)) # imgs = test_dataloader.dataset.images.copy() # iterate over models for index, model in enumerate(models): name = str(model).split()[1] net = nn.DataParallel(model().cuda()) net.load_state_dict(torch.load('models/{}.pth'.format(name))) net.eval() # iterate over transformations for transformation in transforms: # imgs = transformation(imgs) test_dataloader.dataset.images = transformation( test_dataloader.dataset.images) pred = predict(dataloader=test_dataloader, net=net) preds = preds + pred preds = preds / (len(models) * len(transforms)) # preds = preds / len(models) np.savetxt('submission_probs/fpn_152', preds) pred_csv(predictions=preds, threshold=t, name='fpn-152')
def predict_test(t): preds = np.zeros((61191, 17)) # imgs = test_dataloader.dataset.images.copy() # iterate over models for index, model in enumerate(models): name = str(model).split()[1] net = nn.DataParallel(model().cuda()) net.load_state_dict(torch.load('models/{}.pth'.format(name))) net.eval() # iterate over transformations for transformation in transforms: # imgs = transformation(imgs) test_dataloader.dataset.images = transformation( test_dataloader.dataset.images) pred = predict(dataloader=test_dataloader, net=net) preds = preds + pred preds = preds / (len(models) * len(transforms)) # preds = preds / len(models) pred_csv(predictions=preds, threshold=t, name='transforms-resnet152_densenet161_densent169-ensembels')
0.22366667, 0.20366667, 0.17166667, 0.14633333, 0.20066667, 0.18966667, 0.197, 0.20166667, 0.17233333, 0.21466667, 0.15566667, 0.197, 0.16366667, 0.149, 0.25366667, 0.18333333, 0.26033333 ] ] models = [ resnet18_planet, resnet34_planet, resnet50_planet, resnet152_planet, densenet121, densenet161, densenet169, ] labels = np.empty((len(models), 61191, 17)) for m_idx, model in enumerate(models): name = str(model).split()[1] preds = np.loadtxt( '/mnt/home/dunan/Learn/Kaggle/planet_amazon/submission_probs/full_data_{}.txt' .format(name)) # get labels preds = (preds > thresholds[m_idx]).astype(int) labels[m_idx] = preds # majority voting labels = labels.sum(axis=0) labels = (labels >= (len(models) // 2)).astype(int) pred_csv(predictions=labels, name='majority_voting_ensembles_full_data')
import pandas as pd import numpy as np import glob from data import kgdataset from data.kgdataset import KgForestDataset kgdataset.KAGGLE_DATA_DIR = '../../../kaggle' from util import name_idx, pred_csv filenames = glob.glob('submissions/*.csv') print('\n'.join(filenames)) # read all csv files and convert tags to labels dfs = [pd.read_csv(name) for name in filenames] labels = np.empty((8, 61191, 17)) for df_idx, df in enumerate(dfs): print(df_idx) for row, tag in enumerate(df['tags']): label = np.zeros(17) idx = [name_idx()[name] for name in tag.split(' ')] label[idx] = 1 labels[df_idx, row, :] = label majority_voting = labels.sum(axis=0) majority_voting = (majority_voting >= 4).astype(int) pred_csv(majority_voting, name='sub_ensembles')