def run_inflater(args): normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) dataset = datasets.ImageFolder( 'data/dummy-dataset', transforms.Compose([ transforms.CenterCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) class_idx = json.load(open('data/imagenet_class_index.json')) imagenet_classes = [class_idx[str(k)][1] for k in range(len(class_idx))] if args.resnet_nb == 50: resnet = torchvision.models.resnet50(pretrained=True) elif args.resnet_nb == 101: resnet = torchvision.models.resnet101(pretrained=True) elif args.resnet_nb == 152: resnet = torchvision.models.resnet152(pretrained=True) else: raise ValueError( 'resnet_nb should be in [50|101|152] but got {}').format( args.resnet_nb) #loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False) print("pretrained_resnet") print(resnet) i3resnet = I3ResNet(copy.deepcopy(resnet), args.frame_nb) print("resnet is inflated, i3resnet") print(i3resnet) i3resnet.cuda() i3resnet = torch.nn.DataParallel(i3resnet, device_ids=None) save_file_path = 'inflated_resnet-50-imagenet.pth' states = { 'epoch': 0, 'arch': 'resnet-50', 'state_dict': i3resnet.state_dict(), # 'optimizer': optimizer.state_dict(), } torch.save(states, save_file_path)
def train(num_epoch=100, root='/home/selfdriving/mrcnn/bdd12k/', \ train_split='/home/selfdriving/I3D/data/bdd12k.json', batch_size=4, save_model='models/', \ frame_nb=64,class_nb=7, resnet_nb=50): # setup dataset transform = transforms.Compose([ videotransforms.RandomCrop(224) ]) dataset = Dataset(train_split, 'train', root, transform) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True) if args.val: val_dataset = Dataset(train_split, 'val', root, transforms) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=16, pin_memory=True) # dataloaders = {'train': dataloader, 'val': val_dataloader} # datasets = {'train': dataset, 'val': val_dataset} # setup the model if args.resnet_nb == 50: resnet = torchvision.models.resnet50(pretrained=True) elif args.resnet_nb == 101: resnet = torchvision.models.resnet101(pretrained=True) elif args.resnet_nb == 152: resnet = torchvision.models.resnet152(pretrained=True) else: raise ValueError('resnet_nb should be in [50|101|152] but got {}' ).format(args.resnet_nb) i3resnet = I3ResNet(copy.deepcopy(resnet), args.frame_nb, args.class_nb, conv_class=True) # set CPU/GPU devices i3resnet.train() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") i3resnet = i3resnet.to(device) i3resnet = nn.DataParallel(i3resnet) #multiple GPUs class_weights = [0.4,2,2,2,2,2,1] w = torch.FloatTensor(class_weights).cuda() criterion = nn.BCEWithLogitsLoss(pos_weight=w).cuda() optimizer = optim.Adam(i3resnet.parameters(), lr=0.0001, weight_decay=0.001) # train it for epoch in range(0, num_epoch): print('Epoch {}/{}'.format(epoch, num_epoch)) print('-' * 10) lossArr = [] AccuracyArr = [] # Iterate over data. for i, data in enumerate(dataloader): tic = time.time() # get the inputs inputs, labels = data # wrap them in Variable inputs = Variable(inputs.to(device)) #4x3x64x224x224 labels = Variable(labels.to(device)) #4x7 optimizer.zero_grad() pred = i3resnet(inputs) #4x7 loss = criterion(pred, labels) loss.backward() optimizer.step() loss_cpu = np.array(loss.cpu().data.item()) lossArr.append(loss_cpu) meanLoss = np.mean(np.array(lossArr)) # Calculate accuracy predict = torch.sigmoid(pred) >= 0.5 f1 = f1_score(labels.cpu().data.numpy(), predict.cpu().data.numpy(), average='samples') AccuracyArr.append(f1) if i % 10 == 0: toc = time.time() print('time elapsed', toc - tic) #print('prediction:', pred) print('prediction logits:{}'.format(predict)) print('ground truth:{}'.format(labels.cpu().data.numpy())) print('Epoch %d Iteration %d: Loss %.5f Accumulated Loss %.5f' % ( epoch, i, lossArr[-1], meanLoss)) print('Epoch %d Iteration %d: F1 %.5f Accumulated F1 %.5f' % ( epoch, i, AccuracyArr[-1], np.mean(np.array(AccuracyArr)))) # if epoch in [int(0.5*num_epoch), int(0.7*num_epoch)] and i==0: # print('The learning rate is being decreased at Iteration %d', i) # for param_group in optimizer.param_groups: # param_group['lr'] /= 10 # if i >= args.MaxIteration: # break if (epoch + 1) % 5 == 0: torch.save(i3resnet.state_dict(), (save_model + 'net_%d.pth' % (epoch + 1))) if args.val and (epoch + 1)% 1 == 0: print("Validation...") run_test(val_dataloader, i3resnet, device) torch.save(i3resnet.state_dict(), (save_model + 'net_Final.pth'))
index_file=INDEX_FILE, normalize=True, frames=NUM_FRAMES, split_file=SPLIT_FILE) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=10, pin_memory=True) val_dataset = MITDataset(mode="val", transforms=test_transforms, frames=NUM_FRAMES, normalize=True, index_file=INDEX_FILE, split_file=SPLIT_FILE) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=10, pin_memory=True) mlb = dataset.mlb num_classes = len(dataset.mlb.classes_) resnet = torchvision.models.resnet50(pretrained=True) resnet.fc = nn.Linear(2048, num_classes) model = I3ResNet(copy.deepcopy(resnet), NUM_FRAMES)
def run_inflater(args): normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) dataset = datasets.ImageFolder( 'data/dummy-dataset', transforms.Compose([ transforms.CenterCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) class_idx = json.load(open('data/imagenet_class_index.json')) imagenet_classes = [class_idx[str(k)][1] for k in range(len(class_idx))] if args.resnet_nb == 50: resnet = torchvision.models.resnet50(pretrained=True) elif args.resnet_nb == 101: resnet = torchvision.models.resnet101(pretrained=True) elif args.resnet_nb == 152: resnet = torchvision.models.resnet152(pretrained=True) else: raise ValueError( 'resnet_nb should be in [50|101|152] but got {}').format( args.resnet_nb) loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False) i3resnet = I3ResNet(copy.deepcopy(resnet), args.frame_nb) i3resnet.train() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") i3resnet = i3resnet.to(device) resnet = resnet.to(device) for i, (input_2d, target) in enumerate(loader): target = target.to(device) target_var = torch.autograd.Variable(target) input_2d_var = torch.autograd.Variable(input_2d.to(device)) out2d = resnet(input_2d_var) out2d = out2d.cpu().data input_3d = input_2d.unsqueeze(2).repeat(1, 1, args.frame_nb, 1, 1) input_3d_var = torch.autograd.Variable(input_3d.to(device)) out3d = i3resnet(input_3d_var) out3d = out3d.cpu().data out_diff = out2d - out3d print('mean abs error {}'.format(out_diff.abs().mean())) print('mean abs val {}'.format(out2d.abs().mean())) # Computing errors between final predictions of inflated and uninflated # dense networks print( 'Batch {i} maximum error between 2d and inflated predictions: {err}' .format(i=i, err=out_diff.max())) assert (out_diff.max() < 0.0001) if args.display_samples: max_vals, max_indexes = out3d.max(1) for sample_idx in range(out3d.shape[0]): sample_out = out3d[sample_idx] top_val, top_idx = torch.sort(sample_out, 0, descending=True) print('Top {} classes and associated scores: '.format( args.top_k)) for i in range(args.top_k): print('[{}]: {}'.format(imagenet_classes[top_idx[i]], top_val[i])) sample_img = input_2d[sample_idx].numpy().transpose(1, 2, 0) sample_img = (sample_img - sample_img.min()) * ( 1 / (sample_img.max() - sample_img.min())) plt.imshow(sample_img) plt.show()
def eval(args): transform = transforms.Compose([videotransforms.RandomCrop(224)]) val_dataset = Dataset(args.train_split, 'val', args.root, args.frame_nb, args.interval, transform) val_dataloader = torch.utils.data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=True, num_workers=24, # on jobs pin_memory=True) if args.resnet_nb == 50: resnet = torchvision.models.resnet50(pretrained=True) print('load resnet50 pretrained model...') elif args.resnet_nb == 101: resnet = torchvision.models.resnet101(pretrained=True) print('load resnet101 pretrained model...') elif args.resnet_nb == 152: resnet = torchvision.models.resnet152(pretrained=True) print('load resnet152 pretrained model...') else: raise ValueError( 'resnet_nb should be in [50|101|152] but got {}').format( args.resnet_nb) i3resnet = I3ResNet(copy.deepcopy(resnet), args.frame_nb, args.class_nb, conv_class=True) state_dict = torch.load(args.model_path) new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k[7:] # remove 'module'. new_state_dict[name] = v i3resnet.load_state_dict(new_state_dict) print('loaded saved state_dict...') i3resnet.eval() device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") i3resnet = i3resnet.to(device) # i3resnet = nn.DataParallel(i3resnet) AccuracyArr = [] accuracy = np.zeros((1, args.class_nb)) with torch.no_grad(): for i, data in enumerate(val_dataloader): tic = time.time() # tic = time.time() # Read data img_cpu, label_cpu = data img = Variable(img_cpu.to(device)) label = Variable(label_cpu.to(device)) pred = i3resnet(img) # Calculate accuracy predict = torch.sigmoid(pred) > 0.5 f1_sample = f1_score(label_cpu.data.numpy(), predict.cpu().data.numpy(), average='samples') # here!!! f1 = f1_score(label_cpu.data.numpy(), predict.cpu().data.numpy(), average=None) AccuracyArr.append(f1_sample) accuracy = np.vstack((accuracy, f1)) if i % 10 == 0: toc = time.time() print('validation dataset batch:', i) print('prediction logits:{}'.format( predict.cpu().data.numpy())) print('ground truth:{}'.format(label_cpu.data.numpy())) print('f1 score:', f1_sample, 'accumulated f1 score:', np.mean(np.array(AccuracyArr))) # print('f1 average:', np.mean(accuracy, axis=0)) print('Time elapsed:', toc - tic) torch.cuda.empty_cache() print("Finished Validation")