def run_inflater(args):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    dataset = datasets.ImageFolder(
        'data/dummy-dataset',
        transforms.Compose([
            transforms.CenterCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    class_idx = json.load(open('data/imagenet_class_index.json'))
    imagenet_classes = [class_idx[str(k)][1] for k in range(len(class_idx))]

    if args.resnet_nb == 50:
        resnet = torchvision.models.resnet50(pretrained=True)
    elif args.resnet_nb == 101:
        resnet = torchvision.models.resnet101(pretrained=True)
    elif args.resnet_nb == 152:
        resnet = torchvision.models.resnet152(pretrained=True)
    else:
        raise ValueError(
            'resnet_nb should be in [50|101|152] but got {}').format(
                args.resnet_nb)

    #loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)
    print("pretrained_resnet")
    print(resnet)
    i3resnet = I3ResNet(copy.deepcopy(resnet), args.frame_nb)
    print("resnet is inflated, i3resnet")
    print(i3resnet)
    i3resnet.cuda()
    i3resnet = torch.nn.DataParallel(i3resnet, device_ids=None)

    save_file_path = 'inflated_resnet-50-imagenet.pth'
    states = {
        'epoch': 0,
        'arch': 'resnet-50',
        'state_dict': i3resnet.state_dict(),
        # 'optimizer': optimizer.state_dict(),
    }
    torch.save(states, save_file_path)
Esempio n. 2
0
def train(num_epoch=100, root='/home/selfdriving/mrcnn/bdd12k/', \
        train_split='/home/selfdriving/I3D/data/bdd12k.json', batch_size=4, save_model='models/', \
        frame_nb=64,class_nb=7, resnet_nb=50):
    # setup dataset

    transform = transforms.Compose([
        videotransforms.RandomCrop(224)
    ])

    dataset = Dataset(train_split, 'train', root, transform)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=True,
                                             num_workers=16,
                                             pin_memory=True)
    
    if args.val:
        val_dataset = Dataset(train_split, 'val', root, transforms)
        val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    num_workers=16,
                                                    pin_memory=True)

    # dataloaders = {'train': dataloader, 'val': val_dataloader}
    # datasets = {'train': dataset, 'val': val_dataset}

    # setup the model
    if args.resnet_nb == 50:
        resnet = torchvision.models.resnet50(pretrained=True)
    elif args.resnet_nb == 101:
        resnet = torchvision.models.resnet101(pretrained=True)
    elif args.resnet_nb == 152:
        resnet = torchvision.models.resnet152(pretrained=True)
    else:
        raise ValueError('resnet_nb should be in [50|101|152] but got {}'
                         ).format(args.resnet_nb)

    i3resnet = I3ResNet(copy.deepcopy(resnet), args.frame_nb, args.class_nb, conv_class=True)
   
    
    # set CPU/GPU devices
    i3resnet.train()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    i3resnet = i3resnet.to(device)
    i3resnet = nn.DataParallel(i3resnet) #multiple GPUs

    class_weights = [0.4,2,2,2,2,2,1]
    w = torch.FloatTensor(class_weights).cuda()
    criterion = nn.BCEWithLogitsLoss(pos_weight=w).cuda()
    optimizer = optim.Adam(i3resnet.parameters(), lr=0.0001, weight_decay=0.001)


    # train it
    for epoch in range(0, num_epoch):
        print('Epoch {}/{}'.format(epoch, num_epoch))
        print('-' * 10)

        lossArr = []
        AccuracyArr = []

            # Iterate over data.
        for i, data in enumerate(dataloader):
            tic = time.time()
            # get the inputs
            inputs, labels = data

            # wrap them in Variable
            inputs = Variable(inputs.to(device)) #4x3x64x224x224
            labels = Variable(labels.to(device)) #4x7

            optimizer.zero_grad()
            pred = i3resnet(inputs) #4x7

            loss = criterion(pred, labels)
            loss.backward()
            optimizer.step()
            loss_cpu = np.array(loss.cpu().data.item())

            lossArr.append(loss_cpu)
            meanLoss = np.mean(np.array(lossArr))

            # Calculate accuracy
            predict = torch.sigmoid(pred) >= 0.5
            f1 = f1_score(labels.cpu().data.numpy(), predict.cpu().data.numpy(), average='samples')
            AccuracyArr.append(f1)


            if i % 10 == 0:
                toc = time.time()
                print('time elapsed', toc - tic)
                #print('prediction:', pred)
                print('prediction logits:{}'.format(predict))

                print('ground truth:{}'.format(labels.cpu().data.numpy()))
                print('Epoch %d Iteration %d: Loss %.5f Accumulated Loss %.5f' % (
                    epoch, i, lossArr[-1], meanLoss))
                print('Epoch %d Iteration %d: F1 %.5f Accumulated F1 %.5f' % (
                    epoch, i, AccuracyArr[-1], np.mean(np.array(AccuracyArr))))

            # if epoch in [int(0.5*num_epoch), int(0.7*num_epoch)] and i==0:
            #     print('The learning rate is being decreased at Iteration %d', i)
            #     for param_group in optimizer.param_groups:
            #         param_group['lr'] /= 10

        # if i >= args.MaxIteration:
        #     break

        if (epoch + 1) % 5 == 0:
            torch.save(i3resnet.state_dict(), (save_model + 'net_%d.pth' % (epoch + 1)))
        if args.val and (epoch + 1)% 1 == 0:
            print("Validation...")
            run_test(val_dataloader, i3resnet, device)

    torch.save(i3resnet.state_dict(), (save_model + 'net_Final.pth'))          
Esempio n. 3
0
                     index_file=INDEX_FILE,
                     normalize=True,
                     frames=NUM_FRAMES,
                     split_file=SPLIT_FILE)

dataloader = torch.utils.data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         num_workers=10,
                                         pin_memory=True)

val_dataset = MITDataset(mode="val",
                         transforms=test_transforms,
                         frames=NUM_FRAMES,
                         normalize=True,
                         index_file=INDEX_FILE,
                         split_file=SPLIT_FILE)

val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=10,
                                             pin_memory=True)

mlb = dataset.mlb
num_classes = len(dataset.mlb.classes_)

resnet = torchvision.models.resnet50(pretrained=True)
resnet.fc = nn.Linear(2048, num_classes)
model = I3ResNet(copy.deepcopy(resnet), NUM_FRAMES)
Esempio n. 4
0
def run_inflater(args):
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    dataset = datasets.ImageFolder(
        'data/dummy-dataset',
        transforms.Compose([
            transforms.CenterCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    class_idx = json.load(open('data/imagenet_class_index.json'))
    imagenet_classes = [class_idx[str(k)][1] for k in range(len(class_idx))]

    if args.resnet_nb == 50:
        resnet = torchvision.models.resnet50(pretrained=True)
    elif args.resnet_nb == 101:
        resnet = torchvision.models.resnet101(pretrained=True)
    elif args.resnet_nb == 152:
        resnet = torchvision.models.resnet152(pretrained=True)
    else:
        raise ValueError(
            'resnet_nb should be in [50|101|152] but got {}').format(
                args.resnet_nb)

    loader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False)
    i3resnet = I3ResNet(copy.deepcopy(resnet), args.frame_nb)
    i3resnet.train()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    i3resnet = i3resnet.to(device)
    resnet = resnet.to(device)

    for i, (input_2d, target) in enumerate(loader):
        target = target.to(device)
        target_var = torch.autograd.Variable(target)
        input_2d_var = torch.autograd.Variable(input_2d.to(device))

        out2d = resnet(input_2d_var)
        out2d = out2d.cpu().data

        input_3d = input_2d.unsqueeze(2).repeat(1, 1, args.frame_nb, 1, 1)
        input_3d_var = torch.autograd.Variable(input_3d.to(device))

        out3d = i3resnet(input_3d_var)
        out3d = out3d.cpu().data

        out_diff = out2d - out3d
        print('mean abs error {}'.format(out_diff.abs().mean()))
        print('mean abs val {}'.format(out2d.abs().mean()))

        # Computing errors between final predictions of inflated and uninflated
        # dense networks
        print(
            'Batch {i} maximum error between 2d and inflated predictions: {err}'
            .format(i=i, err=out_diff.max()))
        assert (out_diff.max() < 0.0001)

        if args.display_samples:
            max_vals, max_indexes = out3d.max(1)
            for sample_idx in range(out3d.shape[0]):
                sample_out = out3d[sample_idx]

                top_val, top_idx = torch.sort(sample_out, 0, descending=True)

                print('Top {} classes and associated scores: '.format(
                    args.top_k))
                for i in range(args.top_k):
                    print('[{}]: {}'.format(imagenet_classes[top_idx[i]],
                                            top_val[i]))

                sample_img = input_2d[sample_idx].numpy().transpose(1, 2, 0)
                sample_img = (sample_img - sample_img.min()) * (
                    1 / (sample_img.max() - sample_img.min()))
                plt.imshow(sample_img)
                plt.show()
Esempio n. 5
0
def eval(args):
    transform = transforms.Compose([videotransforms.RandomCrop(224)])

    val_dataset = Dataset(args.train_split, 'val', args.root, args.frame_nb,
                          args.interval, transform)

    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=24,  # on jobs
        pin_memory=True)
    if args.resnet_nb == 50:
        resnet = torchvision.models.resnet50(pretrained=True)
        print('load resnet50 pretrained model...')
    elif args.resnet_nb == 101:
        resnet = torchvision.models.resnet101(pretrained=True)
        print('load resnet101 pretrained model...')
    elif args.resnet_nb == 152:
        resnet = torchvision.models.resnet152(pretrained=True)
        print('load resnet152 pretrained model...')
    else:
        raise ValueError(
            'resnet_nb should be in [50|101|152] but got {}').format(
                args.resnet_nb)

    i3resnet = I3ResNet(copy.deepcopy(resnet),
                        args.frame_nb,
                        args.class_nb,
                        conv_class=True)

    state_dict = torch.load(args.model_path)

    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k[7:]  # remove 'module'.
        new_state_dict[name] = v

    i3resnet.load_state_dict(new_state_dict)
    print('loaded saved state_dict...')

    i3resnet.eval()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    i3resnet = i3resnet.to(device)
    # i3resnet = nn.DataParallel(i3resnet)

    AccuracyArr = []
    accuracy = np.zeros((1, args.class_nb))
    with torch.no_grad():
        for i, data in enumerate(val_dataloader):
            tic = time.time()
            # tic = time.time()
            # Read data

            img_cpu, label_cpu = data
            img = Variable(img_cpu.to(device))
            label = Variable(label_cpu.to(device))

            pred = i3resnet(img)

            # Calculate accuracy
            predict = torch.sigmoid(pred) > 0.5
            f1_sample = f1_score(label_cpu.data.numpy(),
                                 predict.cpu().data.numpy(),
                                 average='samples')  # here!!!
            f1 = f1_score(label_cpu.data.numpy(),
                          predict.cpu().data.numpy(),
                          average=None)

            AccuracyArr.append(f1_sample)
            accuracy = np.vstack((accuracy, f1))

            if i % 10 == 0:
                toc = time.time()
                print('validation dataset batch:', i)
                print('prediction logits:{}'.format(
                    predict.cpu().data.numpy()))
                print('ground truth:{}'.format(label_cpu.data.numpy()))
                print('f1 score:', f1_sample, 'accumulated f1 score:',
                      np.mean(np.array(AccuracyArr)))  #
                print('f1 average:', np.mean(accuracy, axis=0))
                print('Time elapsed:', toc - tic)

            torch.cuda.empty_cache()

    print("Finished Validation")