# organize data dataset_dict = { k: { 'data': torch.tensor(v.data), 'targets': torch.tensor(v.targets) } for k, v in [('train', train_dataset), ('valid', valid_dataset)] } # move data to GPU dataset_dict = bot.map_nested(bot.to(device), dataset_dict) print('=====> Data moved to GPU') # preprocess data on gpu train_set = bot.preprocess(dataset_dict['train'], [ bot.partial(bot.pad, border=4), bot.transpose, bot.normalise, bot.to(torch.float16) ]) valid_set = bot.preprocess( dataset_dict['valid'], [bot.transpose, bot.normalise, bot.to(torch.float16)]) if args.use_subset: # use only subset of the data (10%) train_set['data'], train_set['targets'] = bot.get_subset(train_set, 0.1) valid_set['data'], valid_set['targets'] = bot.get_subset(valid_set, 0.1) print('=====> Data preprocessed (on GPU)') # create batching lambda function
# organize data dataset_dict = {k: {'data': torch.tensor(v.data), 'targets': torch.tensor(v.targets)} for k,v in [('train', train_dataset), ('valid', valid_dataset)]} # move data to GPU dataset_dict = bot.map_nested(bot.to(device), dataset_dict) print('=====> Data moved to GPU') # get data statistics for normalizing data mean = tuple(np.mean(train_dataset.data, axis=(0,1,2))) std = tuple(np.std(train_dataset.data, axis=(0,1,2))) mean, std = [torch.tensor(x, device=device, dtype=torch.float16) for x in (mean, std)] normalize = lambda data, mean=mean, std=std: (data-mean)/std # preprocess data on gpu train_set = bot.preprocess(dataset_dict['train'], [bot.partial(bot.pad, border=4), bot.transpose, bot.normalise, bot.to(torch.float16)]) valid_set = bot.preprocess(dataset_dict['valid'], [bot.transpose, normalize, bot.to(torch.float16)]) if args.use_subset: # use only subset of the data (10%) train_set['data'],train_set['targets'] = bot.get_subset(train_set, 0.1) valid_set['data'],valid_set['targets'] = train_set['data'],train_set['targets'] #valid_set['data'],valid_set['targets'] = bot.get_subset(valid_set, 0.01) print('=====> Data preprocessed (on GPU)') # create batching lambda function train_batches = bot.partial(bot.Batches, dataset=train_set, shuffle=True, drop_last=True, max_options=200) valid_batches = bot.partial(bot.Batches, dataset=valid_set, shuffle=False, drop_last=False)
mean, std = [ torch.tensor(x, device=device, dtype=torch.float16) for x in (mean, std) ] normalize = lambda data, mean=mean, std=std: (data - mean) / std # preprocess data on gpu #train_set = bot.preprocess(dataset_dict['train'], [bot.partial(bot.pad, border=4), bot.transpose, bot.normalise, bot.to(torch.float16)]) valid_set = bot.preprocess( dataset_dict['valid'], [bot.transpose, normalize, bot.to(torch.float16)]) # print('=====> Data preprocessed (on GPU)') # create batching lambda function valid_batches = bot.partial(bot.Batches, dataset=valid_set, shuffle=False, drop_last=False) print('=====> Input whitening') # create input whitening network Λ, V = bot.eigens(bot.patches(valid_set['data'][:10000, :, 4:-4, 4:-4])) input_whitening_net = bot.network(conv_pool_block=bot.conv_pool_block_pre, prep_block=bot.partial(bot.whitening_block, Λ=Λ, V=V), scale=1 / 16, types={ nn.ReLU: bot.partial(nn.CELU, 0.3), bot.BatchNorm:
std = tuple(np.std(test_dataset.data, axis=(0,1,2))) mean, std = [torch.tensor(x, device=device, dtype=torch.float16) for x in (mean, std)] normalize = lambda data, mean=mean, std=std: (data-mean)/std # preprocess data on gpu #train_set = bot.preprocess(dataset_dict['train'], [bot.partial(bot.pad, border=4), bot.transpose, bot.normalise, bot.to(torch.float16)]) valid_set = bot.preprocess(dataset_dict['valid'], [bot.transpose, normalize, bot.to(torch.float16)]) if args.use_subset: # use only subset of the data (10%) valid_set['data'],valid_set['targets'] = bot.get_subset(valid_set, 0.1) print('=====> Data preprocessed (on GPU)') # create batching lambda function valid_batches = bot.partial(bot.Batches, dataset=valid_set, shuffle=False, drop_last=False) print('=====> Input whitening') # create input whitening network Λ, V = bot.eigens(bot.patches(valid_set['data'][:10000,:,4:-4,4:-4])) input_whitening_net = bot.network(conv_pool_block=bot.conv_pool_block_pre, prep_block=bot.partial(bot.whitening_block, Λ=Λ, V=V), scale=1/16, types={ nn.ReLU: bot.partial(nn.CELU, 0.3), bot.BatchNorm: bot.partial(bot.GhostBatchNorm, num_splits=16, weight=False)}) print('=====> Building model (with input whitening network)') net = bot.getResNet8BOT(input_whitening_net)