def BSN_Train_TEM(opt):
    writer = SummaryWriter()
    model = TEM(opt)
    model = torch.nn.DataParallel(model, device_ids=GPU_IDs).cuda()

    optimizer = optim.Adam(model.parameters(),
                           lr=opt["tem_training_lr"],
                           weight_decay=opt["tem_weight_decay"])

    train_loader = torch.utils.data.DataLoader(
        VideoDataSet(opt, subset="train"),
        batch_size=model.module.batch_size,
        shuffle=True,
        num_workers=8,
        pin_memory=True,
        drop_last=True)

    test_loader = torch.utils.data.DataLoader(
        VideoDataSet(opt, subset="validation"),
        batch_size=model.module.batch_size,
        shuffle=False,
        num_workers=8,
        pin_memory=True,
        drop_last=True)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=opt["tem_step_size"],
                                                gamma=opt["tem_step_gamma"])

    for epoch in range(opt["tem_epoch"]):
        scheduler.step()
        train_TEM(train_loader, model, optimizer, epoch, writer, opt)
        test_TEM(test_loader, model, epoch, writer, opt)
    writer.close()
Beispiel #2
0
def BSN_inference_TEM(opt):
    model = TEM(opt)
    checkpoint = torch.load(opt["checkpoint_path"]+"/tem_best.pth.tar")
    base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    model.load_state_dict(base_dict)
    model = torch.nn.DataParallel(model, device_ids=[0]).cuda()
    model.eval()
    
    test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="full"),
                                                batch_size=model.module.batch_size, shuffle=False,
                                                num_workers=8, pin_memory=True,drop_last=False)
    
    columns=["action","start","end","xmin","xmax"]
    for index_list,input_data,anchor_xmin,anchor_xmax in test_loader:
        
        TEM_output = model(input_data).detach().cpu().numpy()
        batch_action = TEM_output[:,0,:]
        batch_start = TEM_output[:,1,:]
        batch_end = TEM_output[:,2,:]
        
        index_list = index_list.numpy()
        anchor_xmin = np.array([x.numpy()[0] for x in anchor_xmin])
        anchor_xmax = np.array([x.numpy()[0] for x in anchor_xmax])
        
        for batch_idx,full_idx in enumerate(index_list):            
            video = test_loader.dataset.video_list[full_idx]
            video_action = batch_action[batch_idx]
            video_start = batch_start[batch_idx]
            video_end = batch_end[batch_idx]    
            video_result = np.stack((video_action,video_start,video_end,anchor_xmin,anchor_xmax),axis=1)
            video_df = pd.DataFrame(video_result,columns=columns)  
            video_df.to_csv("./output/TEM_results/"+video+".csv",index=False)
Beispiel #3
0
def BSN_Train_TEM(opt):
    writer = SummaryWriter()
    model = TEM(opt)
    model = torch.nn.DataParallel(model, device_ids=[0]).cuda()
    state_dict = torch.load('checkpoint/tem_best.pth.tar')['state_dict']
    model.load_state_dict(state_dict)
    optimizer = optim.Adam(model.parameters(),
                           lr=opt["tem_training_lr"],
                           weight_decay=opt["tem_weight_decay"])

    train_loader = torch.utils.data.DataLoader(
        VideoDataSet(opt, subset="train"),
        batch_size=model.module.batch_size,
        shuffle=True,
        num_workers=4,
        pin_memory=True,
        drop_last=True)

    test_loader = torch.utils.data.DataLoader(
        VideoDataSet(opt, subset="validation"),
        batch_size=model.module.batch_size,
        shuffle=False,
        num_workers=4,
        pin_memory=True,
        drop_last=True)

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=opt["tem_step_size"],
                                                gamma=opt["tem_step_gamma"])

    for epoch in range(opt["tem_epoch"]):
        train_TEM(train_loader, model, optimizer, epoch, writer, opt)
        scheduler.step()
        if (epoch + 1) % 3 == 0:
            test_TEM(test_loader, model, epoch, writer, opt)
    writer.close()
Beispiel #4
0
def BSN_inference_TEM(opt):
    model = TEM(opt)
    checkpoint = torch.load(opt["checkpoint_path"]+"/"+opt["arch"]+"_tem_best.pth.tar")
    base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    model.load_state_dict(base_dict)
    model = torch.nn.DataParallel(model, device_ids=GPU_IDs).cuda()
    model.eval()
    
    test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="full"),
                                                batch_size=model.module.batch_size, shuffle=False,
                                                num_workers=8, pin_memory=True,drop_last=False)
#     test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="trainval"),
#                                                 batch_size=model.module.batch_size, shuffle=False,
#                                                 num_workers=8, pin_memory=True,drop_last=False)    
    columns=["action","start","end","xmin","xmax"]
    count = 0
    for index_list,input_data,anchor_xmin,anchor_xmax in test_loader:
        #for video with different length
#         if opt['fix_scale'] is False:
        if opt['fix_scale'] == 'nonrescale':
            if len(anchor_xmin) != input_data.shape[2]:
                temporal_scale = input_data.shape[2]
                temporal_gap = 1. / temporal_scale
                anchor_xmin=[temporal_gap*i for i in range(temporal_scale)]
                anchor_xmin = [torch.tensor([x]) for x in anchor_xmin]
                anchor_xmax=[temporal_gap*i for i in range(1,temporal_scale+1)]
                anchor_xmax = [torch.tensor([x]) for x in anchor_xmax]

        #############################################################
        TEM_output = model(input_data).detach().cpu().numpy()
        batch_action = TEM_output[:,0,:]
        batch_start = TEM_output[:,1,:]
        batch_end = TEM_output[:,2,:]
        
        index_list = index_list.numpy()
        anchor_xmin = np.array([x.numpy()[0] for x in anchor_xmin])
        anchor_xmax = np.array([x.numpy()[0] for x in anchor_xmax])
        
        for batch_idx,full_idx in enumerate(index_list):            
            video = test_loader.dataset.video_list[full_idx]
            video_action = batch_action[batch_idx]
            video_start = batch_start[batch_idx]
            video_end = batch_end[batch_idx]    
            video_result = np.stack((video_action,video_start,video_end,anchor_xmin,anchor_xmax),axis=1)
            video_df = pd.DataFrame(video_result,columns=columns)  
            video_df.to_csv("./output/"+opt["arch"]+opt["fix_scale"]+"_TEM_results/"+video+".csv",index=False)
            count += 1
        if count % 100 == 0:
            print('finish', count)
            sys.stdout.flush()
Beispiel #5
0
def BSN_inference_TEM(opt):
    '''
    Inference of TEM
    step - 1. load the best_model
    step - 2. the output of TEM is three pdf-curve for each scaled-video
    '''

    # step - 1
    model = TEM(opt)
    checkpoint = torch.load(opt["checkpoint_path"]+"/tem_best.pth.tar")
    base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
    model.load_state_dict(base_dict)
    model = torch.nn.DataParallel(model, device_ids=[0]).cuda()

    model.eval()

    # step - 2
    # set subset = 'full' to generate the pdf of all video
    test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="full"),
                                                batch_size=model.module.batch_size, shuffle=False,
                                                num_workers=8, pin_memory=True,drop_last=False)

    columns = ['action', 'start', 'end', 'xmin', 'xmax']

    for index_list, input_data, anchor_xmin, anchor_xmax in test_loader:

        TEM_output = model(input_data).detach().cpu().numpy()
        batch_action = TEM_output[:,0,:]
        batch_start  = TEM_output[:,1,:]
        batch_end    = TEM_output[:,2,:]

        index_list  = index_list.numpy()
        anchor_xmin = np.array([x.numpy()[0] for x in anchor_xmin])
        anchor_xmax = np.array([x.numpy()[0] for x in anchor_xmax])

        for batch_idx, full_idx in enumerate(index_list):

            video_name = test_loader.dataset.video_list[full_idx]
            video_action = batch_action[batch_idx]
            video_start = batch_start[batch_idx]
            video_end = batch_end[batch_idx]
            video_result = np.stack((video_action, video_start, video_end, anchor_xmin, anchor_xmax),axis=1)
            video_df = pd.DataFrame(video_result, columns=columns)
            video_df.to_csv('./output/TEM_results/' + video_name + '.csv', index=False)
Beispiel #6
0
def BSN_inference_TEM(opt):
    model = TEM(opt)
def BSN_inference_TEM(opt):
    output_dir = os.path.join(opt['tem_results_dir'],
                              opt['checkpoint_path'].split('/')[-1])
    print(sorted(opt.items()), flush=True)

    model = TEM(opt)
    checkpoint_epoch = opt['checkpoint_epoch']
    if checkpoint_epoch is not None:
        checkpoint_path = os.path.join(
            opt['checkpoint_path'], 'tem_checkpoint.%d.pth' % checkpoint_epoch)
        output_dir = os.path.join(output_dir, 'ckpt.%d' % checkpoint_epoch)
    else:
        checkpoint_path = os.path.join(opt['checkpoint_path'], 'tem_best.pth')
        output_dir = os.path.join(output_dir, 'ckpt.best')

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    print('Checkpoint path is ', checkpoint_path, flush=True)
    checkpoint = torch.load(checkpoint_path)
    base_dict = {
        '.'.join(k.split('.')[1:]): v
        for k, v in list(checkpoint['state_dict'].items())
    }
    model.load_state_dict(base_dict)
    model = torch.nn.DataParallel(model).cuda()
    model.eval()

    if opt['dataset'] == 'gymnastics':
        img_loading_func = get_img_loader(opt)
        dataset = GymnasticsImages(opt,
                                   subset=opt['tem_results_subset'].title(),
                                   img_loading_func=img_loading_func,
                                   image_dir=opt['gym_image_dir'],
                                   video_info_path=os.path.join(
                                       opt['video_info'],
                                       'Full_Annotation.csv'))
    elif opt['dataset'] == 'gymnasticsfeatures':
        # feature_dirs should roughly look like:
        # /checkpoint/cinjon/spaceofmotion/sep052019/tsn.1024.426x240.12.no-oversample/csv/rgb,/checkpoint/cinjon/spaceofmotion/sep052019/tsn.1024.426x240.12.no-oversample/csv/flow
        feature_dirs = opt['feature_dirs'].split(',')
        dataset = GymnasticsFeatures(opt,
                                     subset=opt['tem_results_subset'].title(),
                                     feature_dirs=feature_dirs,
                                     video_info_path=os.path.join(
                                         opt['video_info'],
                                         'Full_Annotation.csv'))
    elif opt['dataset'] == 'thumosfeatures':
        feature_dirs = opt['feature_dirs'].split(',')
        dataset = ThumosFeatures(opt,
                                 subset=opt['tem_results_subset'].title(),
                                 feature_dirs=feature_dirs,
                                 video_info_path=os.path.join(
                                     opt['video_info'], 'Full_Annotation.csv'))
    elif opt['dataset'] == 'thumosimages':
        img_loading_func = get_img_loader(opt)
        dataset = ThumosImages(
            opt,
            subset=opt['tem_results_subset'].title(),
            img_loading_func=img_loading_func,
            image_dir='/checkpoint/cinjon/thumos/rawframes.TH14_%s_tal.30' %
            opt['tem_results_subset'],
            video_info_path=os.path.join(opt['video_info'],
                                         'Full_Annotation.csv'))
    elif opt['dataset'] == 'activitynet':
        representation_module = opt['representation_module']
        test_transforms = get_video_transforms(representation_module, False)
        dataset = VideoDataset(opt,
                               test_transforms,
                               subset='full',
                               fraction=1.0)

    test_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=model.module.batch_size,
        shuffle=False,
        num_workers=opt['data_workers'],
        pin_memory=True,
        drop_last=False)

    columns = ["action", "start", "end", "frames"]

    all_vids = defaultdict(int)
    current_video = None
    current_start = defaultdict(float)
    current_end = defaultdict(float)
    current_action = defaultdict(float)
    calc_time_list = defaultdict(int)
    num_videoframes = opt['num_videoframes']
    skip_videoframes = opt['skip_videoframes']
    print('About to start enumerating', flush=True)
    for test_idx, (index_list, input_data, video_name,
                   snippets) in enumerate(test_loader):
        if test_idx == 0:
            print('Started enumerating!', flush=True)
        # The data should be coming back s.t. consecutive data are from the same video.
        # until there is a breakpoint and it starts a new video.

        TEM_output = model(input_data).detach().cpu().numpy()
        batch_action = TEM_output[:, 0, :]
        batch_start = TEM_output[:, 1, :]
        batch_end = TEM_output[:, 2, :]

        index_list = index_list.numpy()
        for batch_idx, full_idx in enumerate(index_list):
            item_video = video_name[batch_idx]
            all_vids[item_video] += 1
            item_snippets = snippets[batch_idx]
            if not current_video:
                print('First video: ', item_video, flush=True)
                current_video = item_video
                current_start = defaultdict(float)
                current_end = defaultdict(float)
                current_action = defaultdict(float)
                calc_time_list = defaultdict(int)
            elif item_video != current_video:
                print('Next video: ', item_video, full_idx, flush=True)
                column_frames = sorted(calc_time_list.keys())
                column_action = [
                    current_action[k] * 1. / calc_time_list[k]
                    for k in column_frames
                ]
                column_start = [
                    current_start[k] * 1. / calc_time_list[k]
                    for k in column_frames
                ]
                column_end = [
                    current_end[k] * 1. / calc_time_list[k]
                    for k in column_frames
                ]
                video_result = np.stack(
                    [column_action, column_start, column_end], axis=1)
                column_frames = np.reshape(column_frames, [-1, 1])

                video_result = np.concatenate([video_result, column_frames],
                                              axis=1)
                video_df = pd.DataFrame(video_result, columns=columns)
                path = os.path.join(output_dir, '%s.csv' % current_video)
                video_df.to_csv(path, index=False)
                current_video = item_video
                current_start = defaultdict(float)
                current_end = defaultdict(float)
                current_action = defaultdict(float)
                calc_time_list = defaultdict(int)

            for snippet_, action_, start_, end_ in zip(item_snippets,
                                                       batch_action[batch_idx],
                                                       batch_start[batch_idx],
                                                       batch_end[batch_idx]):
                frame = snippet_.item()
                calc_time_list[frame] += 1
                current_action[frame] += action_
                current_start[frame] += start_
                current_end[frame] += end_

    if len(calc_time_list):
        column_frames = sorted(calc_time_list.keys())
        column_action = [
            current_action[k] * 1. / calc_time_list[k] for k in column_frames
        ]
        column_start = [
            current_start[k] * 1. / calc_time_list[k] for k in column_frames
        ]
        column_end = [
            current_end[k] * 1. / calc_time_list[k] for k in column_frames
        ]
        video_result = np.stack([column_action, column_start, column_end],
                                axis=1)
        print(video_result.shape, flush=True)

        video_result = np.concatenate(
            [video_result, np.reshape(column_frames, [-1, 1])], axis=1)
        video_df = pd.DataFrame(video_result, columns=columns)
        path = os.path.join(output_dir, '%s.csv' % current_video)
        video_df.to_csv(path, index=False)
    print(len(all_vids))
def BSN_Train_TEM(opt):
    global_step = 0
    epoch = 0
    if opt['do_representation']:
        model = TEM(opt)
        optimizer = optim.Adam(model.parameters(),
                               lr=opt["tem_training_lr"],
                               weight_decay=opt["tem_weight_decay"])
        global_step, epoch = _maybe_load_checkpoint(
            model, optimizer, global_step, epoch,
            os.path.join(opt["checkpoint_path"], opt['name']))
        if opt['representation_checkpoint']:
            # print(model.representation_model.backbone.inception_5b_3x3.weight[0][0])
            if opt['do_random_model']:
                print('DOING RANDOM MDOEL!!!')
            else:
                print('DOING Pretrianed modelll!!!')
                partial_load(opt['representation_checkpoint'], model)
            # print(model.representation_model.backbone.inception_5b_3x3.weight[0][0])
        if not opt['no_freeze']:
            for param in model.representation_model.parameters():
                param.requires_grad = False
        print(len([p for p in model.representation_model.parameters()]))
    else:
        model = TEM(opt)
        optimizer = optim.Adam(model.parameters(),
                               lr=opt["tem_training_lr"],
                               weight_decay=opt["tem_weight_decay"])
        global_step, epoch = _maybe_load_checkpoint(
            model, optimizer, global_step, epoch,
            os.path.join(opt["checkpoint_path"], opt['name']))

    model = torch.nn.DataParallel(model).cuda()
    # summary(model, (2, 3, 224, 224))

    print('    Total params: %.2fM' %
          (sum(p.numel() for p in model.parameters()) / 1000000.0))

    if opt['dataset'] == 'gymnastics':
        # default image_dir is '/checkpoint/cinjon/spaceofmotion/sep052019/rawframes.426x240.12'
        img_loading_func = get_img_loader(opt)
        train_data_set = GymnasticsImages(opt,
                                          subset='Train',
                                          img_loading_func=img_loading_func,
                                          image_dir=opt['gym_image_dir'],
                                          video_info_path=os.path.join(
                                              opt['video_info'],
                                              'Train_Annotation.csv'))
        train_sampler = GymnasticsSampler(train_data_set, opt['sampler_mode'])
        test_data_set = GymnasticsImages(opt,
                                         subset="Val",
                                         img_loading_func=img_loading_func,
                                         image_dir=opt['gym_image_dir'],
                                         video_info_path=os.path.join(
                                             opt['video_info'],
                                             'Val_Annotation.csv'))
    elif opt['dataset'] == 'gymnasticsfeatures':
        # feature_dirs should roughly look like:
        # /checkpoint/cinjon/spaceofmotion/sep052019/tsn.1024.426x240.12.no-oversample/csv/rgb,/checkpoint/cinjon/spaceofmotion/sep052019/tsn.1024.426x240.12.no-oversample/csv/flow
        feature_dirs = opt['feature_dirs'].split(',')
        train_data_set = GymnasticsFeatures(opt,
                                            subset='Train',
                                            feature_dirs=feature_dirs,
                                            video_info_path=os.path.join(
                                                opt['video_info'],
                                                'Train_Annotation.csv'))
        test_data_set = GymnasticsFeatures(opt,
                                           subset='Val',
                                           feature_dirs=feature_dirs,
                                           video_info_path=os.path.join(
                                               opt['video_info'],
                                               'Val_Annotation.csv'))
        train_sampler = None
    elif opt['dataset'] == 'thumosfeatures':
        feature_dirs = opt['feature_dirs'].split(',')
        train_data_set = ThumosFeatures(opt,
                                        subset='Val',
                                        feature_dirs=feature_dirs)
        test_data_set = ThumosFeatures(opt,
                                       subset="Test",
                                       feature_dirs=feature_dirs)
        train_sampler = None
    elif opt['dataset'] == 'thumosimages':
        img_loading_func = get_img_loader(opt)
        train_data_set = ThumosImages(
            opt,
            subset='Val',
            img_loading_func=img_loading_func,
            image_dir=
            '/checkpoint/cinjon/thumos/rawframes.TH14_validation_tal.30',
            video_info_path=os.path.join(opt['video_info'],
                                         'Val_Annotation.csv'))
        test_data_set = ThumosImages(
            opt,
            subset='Test',
            img_loading_func=img_loading_func,
            image_dir='/checkpoint/cinjon/thumos/rawframes.TH14_test_tal.30',
            video_info_path=os.path.join(opt['video_info'],
                                         'Test_Annotation.csv'))
        train_sampler = None
    elif opt['dataset'] == 'activitynet':
        train_sampler = None
        representation_module = opt['representation_module']
        train_transforms = get_video_transforms(representation_module,
                                                opt['do_augment'])
        test_transforms = get_video_transforms(representation_module, False)
        train_data_set = VideoDataset(opt,
                                      train_transforms,
                                      subset='train',
                                      fraction=0.3)
        # We use val because we don't have annotations for test.
        test_data_set = VideoDataset(opt,
                                     test_transforms,
                                     subset='val',
                                     fraction=0.3)

    print('train_loader / val_loader sizes: ', len(train_data_set),
          len(test_data_set))
    train_loader = torch.utils.data.DataLoader(
        train_data_set,
        batch_size=model.module.batch_size,
        shuffle=False if train_sampler else True,
        sampler=train_sampler,
        num_workers=opt['data_workers'],
        pin_memory=True,
        drop_last=False)

    test_loader = torch.utils.data.DataLoader(
        test_data_set,
        batch_size=model.module.batch_size,
        shuffle=False,
        num_workers=opt['data_workers'],
        pin_memory=True,
        drop_last=False)
    # test_loader = None

    milestones = [int(k) for k in opt['tem_lr_milestones'].split(',')]
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=milestones, gamma=opt['tem_step_gamma'])

    if opt['log_to_comet']:
        comet_exp = CometExperiment(api_key="hIXq6lDzWzz24zgKv7RYz6blo",
                                    project_name="bsn",
                                    workspace="cinjon",
                                    auto_metric_logging=True,
                                    auto_output_logging=None,
                                    auto_param_logging=False)
    elif opt['local_comet_dir']:
        comet_exp = OfflineExperiment(api_key="hIXq6lDzWzz24zgKv7RYz6blo",
                                      project_name="bsn",
                                      workspace="cinjon",
                                      auto_metric_logging=True,
                                      auto_output_logging=None,
                                      auto_param_logging=False,
                                      offline_directory=opt['local_comet_dir'])
    else:
        comet_exp = None

    if comet_exp:
        comet_exp.log_parameters(opt)
        comet_exp.set_name(opt['name'])

    # test_TEM(test_loader, model, optimizer, 0, 0, comet_exp, opt)
    for epoch in range(epoch + 1, opt["tem_epoch"] + 1):
        global_step = train_TEM(train_loader, model, optimizer, epoch,
                                global_step, comet_exp, opt)
        test_TEM(test_loader, model, optimizer, epoch, global_step, comet_exp,
                 opt)
        if opt['dataset'] == 'activitynet':
            test_loader.dataset._subset_dataset(.3)
            train_loader.dataset._subset_dataset(.3)
        scheduler.step()