def get(cls, args):
     train_file = args.train_file
     val_file = args.val_file
     train_dataset = cls(
         args, args.data, 'train', train_file, args.cache,
         transform=transforms.Compose([
             videotransforms.RandomCrop(args.input_size),
             videotransforms.RandomHorizontalFlip()
         ]),
         input_size=args.input_size)
     val_dataset = cls(
         args, args.data, 'val', val_file, args.cache,
         transform=transforms.Compose([
             videotransforms.CenterCrop(256)
         ]),
         input_size=args.input_size)
     valvideo_dataset = cls(
         args, args.data, 'val_video', val_file, args.cache,
         transform=transforms.Compose([
             videotransforms.CenterCrop(256)
         ]),
         input_size=args.input_size)
     return train_dataset, val_dataset, valvideo_dataset
Ejemplo n.º 2
0
def main(DATASET,
         LABELS,
         CLASS_IDS,
         BATCH_SIZE,
         ANNOTATION_FILE,
         SEQ_SIZE=16,
         STEP=16,
         nstrokes=-1,
         N_EPOCHS=25,
         base_name=""):
    '''
    Extract sequence features from AutoEncoder.
    
    Parameters:
    -----------
    DATASET : str
        path to the video dataset
    LABELS : str
        path containing stroke labels
    CLASS_IDS : str
        path to txt file defining classes, similar to THUMOS
    BATCH_SIZE : int
        size for batch of clips
    SEQ_SIZE : int
        no. of frames in a clip (min. 16 for 3D CNN extraction)
    STEP : int
        stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ...
    partition : str
        'all' / 'train' / 'test' / 'val' : Videos to be considered
    nstrokes : int
        partial extraction of features (do not execute for entire dataset)
    
    Returns:
    --------
    trajectories, stroke_names
    
    '''
    ###########################################################################
    # seed everything
    seed = 1234
    attn_utils.seed_everything(seed)
    if not os.path.isdir(base_name):
        os.makedirs(base_name)

    # Read the strokes
    # Divide the highlight dataset files into training, validation and test sets
    train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET)
    print("No. of training videos : {}".format(len(train_lst)))

    ###########################################################################
    # Create a Dataset
    # Clip level transform. Use this with framewiseTransform flag turned off
    train_transform = transforms.Compose([
        videotransforms.RandomCrop(224),
        videotransforms.ToPILClip(),
        videotransforms.Resize((112, 112)),
        videotransforms.ToTensor(),
        videotransforms.Normalize(),
        #videotransforms.RandomHorizontalFlip(),\
    ])
    test_transform = transforms.Compose([
        videotransforms.CenterCrop(224),
        videotransforms.ToPILClip(),
        videotransforms.Resize((112, 112)),
        videotransforms.ToTensor(),
        videotransforms.Normalize(),
        #videotransforms.RandomHorizontalFlip(),\
    ])
    train_dataset = CricketStrokesDataset(train_lst,
                                          DATASET,
                                          LABELS,
                                          CLASS_IDS,
                                          frames_per_clip=SEQ_SIZE,
                                          step_between_clips=STEP,
                                          train=True,
                                          framewiseTransform=False,
                                          transform=train_transform)
    val_dataset = CricketStrokesDataset(val_lst,
                                        DATASET,
                                        LABELS,
                                        CLASS_IDS,
                                        frames_per_clip=SEQ_SIZE,
                                        step_between_clips=STEP,
                                        train=False,
                                        framewiseTransform=False,
                                        transform=test_transform)

    ###########################################################################

    labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE)

    num_classes = len(list(set(labs_values)))

    # created weighted Sampler for class imbalance
    if not os.path.isfile(
            os.path.join(
                base_name, "weights_c" + str(num_classes) + "_" +
                str(len(train_dataset)) + ".pkl")):
        samples_weight = attn_utils.get_sample_weights(train_dataset,
                                                       labs_keys, labs_values,
                                                       train_lst)
        with open(
                os.path.join(
                    base_name, "weights_c" + str(num_classes) + "_" +
                    str(len(train_dataset)) + ".pkl"), "wb") as fp:
            pickle.dump(samples_weight, fp)
    with open(
            os.path.join(
                base_name, "weights_c" + str(num_classes) + "_" +
                str(len(train_dataset)) + ".pkl"), "rb") as fp:
        samples_weight = pickle.load(fp)
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              sampler=sampler,
                              worker_init_fn=np.random.seed(12))

    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False)

    data_loaders = {"train": train_loader, "test": val_loader}

    ###########################################################################
    # load model and set loss function
    encoder = conv_attn_model.Conv3DEncoder(HIDDEN_SIZE, 1, bidirectional)
    #    encoder = conv_attn_model.Conv3DAttention(HIDDEN_SIZE, num_classes, 1, 196, bidirectional)
    decoder = conv_attn_model.Conv3DDecoder(HIDDEN_SIZE, num_classes, 1, 1,
                                            bidirectional)
    #    decoder = conv_encdec_model.Conv3DDecoder(HIDDEN_SIZE, HIDDEN_SIZE, 1, 196, bidirectional)
    #    model = attn_model.Encoder(10, 20, bidirectional)

    #    for ft in model.parameters():
    #        ft.requires_grad = False
    #    inp_feat_size = model.fc.in_features
    #    model.fc = nn.Linear(inp_feat_size, num_classes)
    #    model = model.to(device)
    encoder = encoder.to(device)
    decoder = decoder.to(device)
    #    # load checkpoint:

    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()
    #    criterion = nn.MSELoss()

    #    # Layers to finetune. Last layer should be displayed
    print("Params to learn:")
    params_to_update = []
    for name, param in encoder.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("Encoder : {}".format(name))
    for name, param in decoder.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("Decoder : {}".format(name))

    # Observe that all parameters are being optimized


#    optimizer_ft = torch.optim.Adam(params_to_update, lr=0.001)
#    optimizer_ft = torch.optim.SGD(params_to_update, lr=0.01, momentum=0.9)
    encoder_optimizer = torch.optim.SGD(encoder.parameters(),
                                        lr=0.01,
                                        momentum=0.9)
    decoder_optimizer = torch.optim.SGD(decoder.parameters(),
                                        lr=0.01,
                                        momentum=0.9)
    #    decoder_optimizer = None

    # Decay LR by a factor of 0.1 every 7 epochs
    lr_scheduler = StepLR(encoder_optimizer, step_size=10, gamma=0.1)

    #    # Observe that all parameters are being optimized
    #    optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

    #    ###########################################################################
    # Training the model
    start = time.time()

    (encoder, decoder) = train_model(encoder,
                                     decoder,
                                     data_loaders,
                                     criterion,
                                     encoder_optimizer,
                                     decoder_optimizer,
                                     lr_scheduler,
                                     labs_keys,
                                     labs_values,
                                     num_epochs=N_EPOCHS)

    end = time.time()

    # save the best performing model
    attn_utils.save_attn_model_checkpoint(base_name, (encoder, decoder),
                                          N_EPOCHS, "SGD")
    # Load model checkpoints
    encoder, decoder = attn_utils.load_attn_model_checkpoint(
        base_name, encoder, decoder, N_EPOCHS, "SGD")

    print("Total Execution time for {} epoch : {}".format(
        N_EPOCHS, (end - start)))

    ###########################################################################

    #    features_val, stroke_names_id_val = attn_utils.read_feats(os.path.join(base_name, ft_dir),
    #                                                              feat_val, snames_val)
    print("Writing prediction dictionary....")
    pred_out_dict, acc = predict(encoder,
                                 decoder,
                                 data_loaders,
                                 criterion,
                                 labs_keys,
                                 labs_values,
                                 phase='test')

    with open(os.path.join(base_name, "pred_dict.pkl"), "wb") as fp:
        pickle.dump(pred_out_dict, fp)

    # save the output wts and related information
    print("#Parameters Encoder : {} ".format(
        autoenc_utils.count_parameters(encoder)))
    print("#Parameters Decoder : {} ".format(
        autoenc_utils.count_parameters(decoder)))

    return encoder, decoder
Ejemplo n.º 3
0
def main(DATASET,
         LABELS,
         CLASS_IDS,
         BATCH_SIZE,
         ANNOTATION_FILE,
         SEQ_SIZE=16,
         STEP=16,
         nstrokes=-1,
         N_EPOCHS=25,
         base_name=""):
    '''
    Extract sequence features from AutoEncoder.
    
    Parameters:
    -----------
    DATASET : str
        path to the video dataset
    LABELS : str
        path containing stroke labels
    CLASS_IDS : str
        path to txt file defining classes, similar to THUMOS
    BATCH_SIZE : int
        size for batch of clips
    SEQ_SIZE : int
        no. of frames in a clip (min. 16 for 3D CNN extraction)
    STEP : int
        stride for next example. If SEQ_SIZE=16, STEP=8, use frames (0, 15), (8, 23) ...
    partition : str
        'all' / 'train' / 'test' / 'val' : Videos to be considered
    nstrokes : int
        partial extraction of features (do not execute for entire dataset)
    
    Returns:
    --------
    trajectories, stroke_names
    
    '''
    if not os.path.isdir(base_name):
        os.makedirs(base_name)
    seed = 1234
    attn_utils.seed_everything(seed)
    ###########################################################################
    # Read the strokes
    # Divide the highlight dataset files into training, validation and test sets
    train_lst, val_lst, test_lst = autoenc_utils.split_dataset_files(DATASET)
    print("No. of training videos : {}".format(len(train_lst)))

    ###########################################################################
    # Create a Dataset
    # Clip level transform. Use this with framewiseTransform flag turned off
    train_transforms = transforms.Compose([
        videotransforms.RandomCrop(300),
        videotransforms.ToPILClip(),
        videotransforms.Resize((112, 112)),
        videotransforms.ToTensor(),
        videotransforms.Normalize(),
        #                                           videotransforms.ScaledNormMinMax(),
    ])
    test_transforms = transforms.Compose([
        videotransforms.CenterCrop(300),
        videotransforms.ToPILClip(),
        videotransforms.Resize((112, 112)),
        videotransforms.ToTensor(),
        videotransforms.Normalize(),
        #                                          videotransforms.ScaledNormMinMax(),
    ])
    train_dataset = CricketStrokesDataset(train_lst,
                                          DATASET,
                                          LABELS,
                                          CLASS_IDS,
                                          frames_per_clip=SEQ_SIZE,
                                          step_between_clips=STEP,
                                          train=True,
                                          framewiseTransform=False,
                                          transform=train_transforms)
    val_dataset = CricketStrokesDataset(val_lst,
                                        DATASET,
                                        LABELS,
                                        CLASS_IDS,
                                        frames_per_clip=SEQ_SIZE,
                                        step_between_clips=STEP,
                                        train=False,
                                        framewiseTransform=False,
                                        transform=test_transforms)

    ###########################################################################

    labs_keys, labs_values = attn_utils.get_cluster_labels(ANNOTATION_FILE)

    num_classes = len(list(set(labs_values)))

    # created weighted Sampler for class imbalance
    if not os.path.isfile(
            os.path.join(
                base_name, "weights_c" + str(num_classes) + "_" +
                str(len(train_dataset)) + ".pkl")):
        samples_weight = attn_utils.get_sample_weights(train_dataset,
                                                       labs_keys, labs_values,
                                                       train_lst)
        with open(
                os.path.join(
                    base_name, "weights_c" + str(num_classes) + "_" +
                    str(len(train_dataset)) + ".pkl"), "wb") as fp:
            pickle.dump(samples_weight, fp)
    with open(
            os.path.join(
                base_name, "weights_c" + str(num_classes) + "_" +
                str(len(train_dataset)) + ".pkl"), "rb") as fp:
        samples_weight = pickle.load(fp)
    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=BATCH_SIZE,
                              sampler=sampler,
                              worker_init_fn=np.random.seed(12))

    val_loader = DataLoader(dataset=val_dataset,
                            batch_size=BATCH_SIZE,
                            shuffle=False)

    data_loaders = {"train": train_loader, "test": val_loader}

    ###########################################################################
    # load model and set loss function
    model = conv_attn_model.C3DGRUv2Orig(HIDDEN_SIZE, 1, num_classes,
                                         bidirectional)
    model_pretrained = c3d.C3D()
    model_pretrained.load_state_dict(
        torch.load("../localization_rnn/" + wts_path))
    #    model_pretrained = c3d_pre.C3D()
    #    model_pretrained.fc8 = nn.Linear(4096, 5)
    #    model_pretrained.load_state_dict(torch.load(pretrained_c3d_wts))
    copy_pretrained_weights(model_pretrained, model)
    # reset the last layer (default requires_grad is True)
    #    model.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
    #    for ft in model.parameters():
    #        ft.requires_grad = False
    #    inp_feat_size = model.fc.in_features
    #    model.fc = nn.Linear(inp_feat_size, num_classes)
    model = model.to(device)

    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()
    #    criterion = nn.MSELoss()

    #    # Layers to finetune. Last layer should be displayed
    print("Params to learn:")
    params_to_update = []
    for name, param in model.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t {}".format(name))

    # Observe that all parameters are being optimized


#    optimizer_ft = torch.optim.Adam(params_to_update, lr=0.01)
    optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    lr_scheduler = StepLR(optimizer_ft, step_size=30, gamma=0.1)

    ###########################################################################
    # Training the model
    start = time.time()

    model = train_model(model,
                        data_loaders,
                        criterion,
                        optimizer_ft,
                        lr_scheduler,
                        labs_keys,
                        labs_values,
                        num_epochs=N_EPOCHS)

    end = time.time()

    # save the best performing model
    attn_utils.save_model_checkpoint(base_name, model, N_EPOCHS,
                                     "SGD_c8_c3dgruEp60Step30")
    # Load model checkpoints
    model = attn_utils.load_weights(base_name, model, N_EPOCHS,
                                    "SGD_c8_c3dgruEp60Step30")

    print("Total Execution time for {} epoch : {}".format(
        N_EPOCHS, (end - start)))

    #    ###########################################################################

    print("Predicting ...")
    acc = predict(model, data_loaders, labs_keys, labs_values, phase='test')

    print("#Parameters : {} ".format(autoenc_utils.count_parameters(model)))

    return model
Ejemplo n.º 4
0
def run(init_lr=0.1, max_steps=64e3, mode='rgb', root='', batch_size=32, save_model='i3dIter1k_'):
    
    num_epochs = 30
    seed_everything()
    if not os.path.isdir(log_path):
        os.makedirs(log_path)
    
    # setup dataset
    train_transforms = transforms.Compose([T.RandomCrop(224),
                                         T.ToPILClip(), 
                                         T.Resize((224, 224)),
#                                         T.RandomCrop(112), 
                                         T.ToTensor(), 
                                         T.Normalize(),
                                        #T.RandomHorizontalFlip(),\
                                        ])
    test_transforms = transforms.Compose([T.CenterCrop(224),
                                         T.ToPILClip(), 
                                         T.Resize((224, 224)),
#                                         T.RandomCrop(112), 
                                         T.ToTensor(), 
                                         T.Normalize(),
                                        #T.RandomHorizontalFlip(),\
                                        ])    
#    train_transforms = transforms.Compose([T.RandomCrop(224),
#                                           T.RandomHorizontalFlip(),
#    ])
#    test_transforms = transforms.Compose([T.CenterCrop(224)])

    dataset = HMDB51(DATASET, LABELS, 16, step_between_clips = 1, 
                     fold=1, train=True, transform=train_transforms)
#    samples_weight = get_hmdb_sample_weights(dataset)
#    sampler = WeightedRandomSampler(samples_weight, len(samples_weight))
#    dataset = Dataset(train_split, 'training', root, mode, train_transforms)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)#sampler=sampler, worker_init_fn=np.random.seed(12)) #shuffle=True) #, num_workers=36, pin_memory=True)
    val_dataset = HMDB51(DATASET, LABELS, 16, step_between_clips = 1, 
                     fold=1, train=False, transform=test_transforms)
#    val_dataset = Dataset(train_split, 'testing', root, mode, test_transforms)
    val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False) #, num_workers=36, pin_memory=True)    

    dataloaders = {'train': dataloader, 'test': val_dataloader}
    datasets = {'train': dataset, 'test': val_dataset}

#    vis_samples(dataset, True)
    
    # setup the model
    if mode == 'flow':
        i3d = InceptionI3d(400, in_channels=2)
        i3d.load_state_dict(torch.load('/home/arpan/VisionWorkspace/pytorch-i3d/models/flow_imagenet.pt'))
    else:
        i3d = InceptionI3d(400, in_channels=3)
        i3d.load_state_dict(torch.load('/home/arpan/VisionWorkspace/pytorch-i3d/models/rgb_imagenet.pt'))
    i3d.replace_logits(51)
    #i3d.load_state_dict(torch.load('/ssd/models/000920.pt'))
    i3d = i3d.to(device)
#    i3d = nn.DataParallel(i3d)

    lr = init_lr
    optimizer = optim.SGD(i3d.parameters(), lr=lr, momentum=0.9, weight_decay=0.0000001)
#    lr_sched = optim.lr_scheduler.MultiStepLR(optimizer, [10, 25]) # [300, 1000])
    # Decay LR by a factor of 0.1 every 7 epochs
    lr_sched = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
#    criterion = nn.CrossEntropyLoss()
    num_steps_per_update = 4 # accum gradient
    steps = 0
    # train it
    start = time.time()
#    print("No. of Iters / Epoch ; {}".format(len(dataloaders['train'])))
#    for epoch in range(num_epochs): #while steps < max_steps:
##        print( 'Step {}/{}'.format(steps, max_steps))
#        print('Epoch {}/{}'.format(epoch+1, num_epochs))
#        print('-' * 10)
#
#        # Each epoch has a training and validation phase
#        for phase in ['train', 'test']:
#            if phase == 'train':
#                i3d.train(True)
#            else:
#                i3d.train(False)  # Set model to evaluate mode
#                
#            tot_loss = 0.0
#            tot_loc_loss = 0.0
#            tot_cls_loss = 0.0
#            num_iter = 0
#            
#            running_corrects = 0
#            count = [0.] * 51
#            
#            # Iterate over data.
#            for bno, (inputs, vid_path, start_pts, end_pts, labels) in enumerate(dataloaders[phase]):
#                num_iter += 1
#                # wrap them in Variable
#                inputs = inputs.permute(0, 2, 1, 3, 4).float()      # for PIL and ToTensor
##                inputs = inputs.permute(0, 4, 1, 2, 3).float()      # for Raw Crops
#                inputs = inputs.to(device)
##                t = inputs.size(2)
#                labels = labels.to(device)
#
#                iter_counts = Counter(labels.tolist())
#                for k,v in iter_counts.items():
#                    count[k]+=v
#                    
#                optimizer.zero_grad()
#                
#                per_frame_logits = i3d(inputs)  # get B x N_CLASSES X 1
#                per_frame_logits = per_frame_logits.squeeze(2)
#                # upsample to input size
##                per_frame_logits = F.upsample(per_frame_logits, t, mode='linear')
#
#                # compute localization loss
##                loc_loss = F.binary_cross_entropy_with_logits(per_frame_logits, labels)
##                tot_loc_loss += loc_loss.data[0]
#
#                # compute classification loss (with max-pooling along time B x C x T)
##                cls_loss = F.binary_cross_entropy_with_logits(torch.max(per_frame_logits, dim=2)[0], torch.max(labels, dim=2)[0])
##                tot_cls_loss += cls_loss.data[0]
#                cls_loss = F.cross_entropy(per_frame_logits, labels)
#
##                loss = (0.5*loc_loss + 0.5*cls_loss)/num_steps_per_update
#                loss = cls_loss     #/num_steps_per_update
#                tot_loss += loss.item()
##                loss.backward()
#                
##                print("{}  : bno : {}".format(phase, bno))
#                                    # backward + optimize only if in training phase
#                if phase == 'train':
#                    loss.backward()
#                    optimizer.step()
#                    
#                running_corrects += torch.sum(torch.max(per_frame_logits, 1)[1] == labels.data)
#
###                if num_iter == num_steps_per_update and phase == 'train':
##                if phase == 'train':
##                    steps += 1
##                    num_iter = 0
##                    optimizer.step()
##                    optimizer.zero_grad()
##                    lr_sched.step()
##                    if steps % 10 == 0:
##                        print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/(10*num_steps_per_update), tot_cls_loss/(10*num_steps_per_update), tot_loss/10))
##                        # save model
##                        torch.save(i3d.state_dict(), save_model+str(steps).zfill(6)+'.pt')
##                        tot_loss = tot_loc_loss = tot_cls_loss = 0.
##                if (bno + 1) % 10 == 0:
##                    print('{} : {}/{} Loss: {:.4f} Corrects: {:.4f}'.format(phase, 
##                          bno, len(dataloaders[phase]), tot_loc_loss, running_corrects))
#                if bno == 1000:
#                    break
#            if phase == 'train':
#                lr_sched.step()
#                print("Category Weights : {}".format(count))
#            epoch_loss = tot_loss / (16*(bno+1))  #len(dataloaders[phase].dataset)
#            epoch_acc = running_corrects.double() / (16*(bno+1)) #  len(dataloaders[phase].dataset)
#            print('{} Loss: {:.6f} Acc: {:.6f} LR: {}'.format(phase, epoch_loss, epoch_acc, 
#                  lr_sched.get_last_lr()[0]))
#            
##            if phase == 'val':
##                print('{} Loc Loss: {:.4f} Cls Loss: {:.4f} Tot Loss: {:.4f}'.format(phase, tot_loc_loss/num_iter, tot_cls_loss/num_iter, (tot_loss*num_steps_per_update)/num_iter) )
#                
#            if (epoch+1) % 10 == 0:
#                torch.save(i3d.state_dict(), os.path.join(log_path, save_model+str(epoch+1).zfill(6)+'.pt'))
                
    i3d.load_state_dict(torch.load(os.path.join(log_path, save_model+str(num_epochs).zfill(6)+'.pt')))
    
                
    end = time.time()
    print("Total Execution time for {} epoch : {}".format(num_epochs, (end-start)))
    
    ###########################################################################
    
    # Predictions
    
    predict(i3d, dataloaders, 16, 'test')