def BSN_Train_TEM(opt): writer = SummaryWriter() model = TEM(opt) model = torch.nn.DataParallel(model, device_ids=GPU_IDs).cuda() optimizer = optim.Adam(model.parameters(), lr=opt["tem_training_lr"], weight_decay=opt["tem_weight_decay"]) train_loader = torch.utils.data.DataLoader( VideoDataSet(opt, subset="train"), batch_size=model.module.batch_size, shuffle=True, num_workers=8, pin_memory=True, drop_last=True) test_loader = torch.utils.data.DataLoader( VideoDataSet(opt, subset="validation"), batch_size=model.module.batch_size, shuffle=False, num_workers=8, pin_memory=True, drop_last=True) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt["tem_step_size"], gamma=opt["tem_step_gamma"]) for epoch in range(opt["tem_epoch"]): scheduler.step() train_TEM(train_loader, model, optimizer, epoch, writer, opt) test_TEM(test_loader, model, epoch, writer, opt) writer.close()
def BSN_inference_TEM(opt): model = TEM(opt) checkpoint = torch.load(opt["checkpoint_path"]+"/tem_best.pth.tar") base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} model.load_state_dict(base_dict) model = torch.nn.DataParallel(model, device_ids=[0]).cuda() model.eval() test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="full"), batch_size=model.module.batch_size, shuffle=False, num_workers=8, pin_memory=True,drop_last=False) columns=["action","start","end","xmin","xmax"] for index_list,input_data,anchor_xmin,anchor_xmax in test_loader: TEM_output = model(input_data).detach().cpu().numpy() batch_action = TEM_output[:,0,:] batch_start = TEM_output[:,1,:] batch_end = TEM_output[:,2,:] index_list = index_list.numpy() anchor_xmin = np.array([x.numpy()[0] for x in anchor_xmin]) anchor_xmax = np.array([x.numpy()[0] for x in anchor_xmax]) for batch_idx,full_idx in enumerate(index_list): video = test_loader.dataset.video_list[full_idx] video_action = batch_action[batch_idx] video_start = batch_start[batch_idx] video_end = batch_end[batch_idx] video_result = np.stack((video_action,video_start,video_end,anchor_xmin,anchor_xmax),axis=1) video_df = pd.DataFrame(video_result,columns=columns) video_df.to_csv("./output/TEM_results/"+video+".csv",index=False)
def BSN_inference_TEM(opt): model = TEM(opt) checkpoint = torch.load(opt["checkpoint_path"]+"/"+opt["arch"]+"_tem_best.pth.tar") base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} model.load_state_dict(base_dict) model = torch.nn.DataParallel(model, device_ids=GPU_IDs).cuda() model.eval() test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="full"), batch_size=model.module.batch_size, shuffle=False, num_workers=8, pin_memory=True,drop_last=False) # test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="trainval"), # batch_size=model.module.batch_size, shuffle=False, # num_workers=8, pin_memory=True,drop_last=False) columns=["action","start","end","xmin","xmax"] count = 0 for index_list,input_data,anchor_xmin,anchor_xmax in test_loader: #for video with different length # if opt['fix_scale'] is False: if opt['fix_scale'] == 'nonrescale': if len(anchor_xmin) != input_data.shape[2]: temporal_scale = input_data.shape[2] temporal_gap = 1. / temporal_scale anchor_xmin=[temporal_gap*i for i in range(temporal_scale)] anchor_xmin = [torch.tensor([x]) for x in anchor_xmin] anchor_xmax=[temporal_gap*i for i in range(1,temporal_scale+1)] anchor_xmax = [torch.tensor([x]) for x in anchor_xmax] ############################################################# TEM_output = model(input_data).detach().cpu().numpy() batch_action = TEM_output[:,0,:] batch_start = TEM_output[:,1,:] batch_end = TEM_output[:,2,:] index_list = index_list.numpy() anchor_xmin = np.array([x.numpy()[0] for x in anchor_xmin]) anchor_xmax = np.array([x.numpy()[0] for x in anchor_xmax]) for batch_idx,full_idx in enumerate(index_list): video = test_loader.dataset.video_list[full_idx] video_action = batch_action[batch_idx] video_start = batch_start[batch_idx] video_end = batch_end[batch_idx] video_result = np.stack((video_action,video_start,video_end,anchor_xmin,anchor_xmax),axis=1) video_df = pd.DataFrame(video_result,columns=columns) video_df.to_csv("./output/"+opt["arch"]+opt["fix_scale"]+"_TEM_results/"+video+".csv",index=False) count += 1 if count % 100 == 0: print('finish', count) sys.stdout.flush()
def BSN_inference_TEM(opt): ''' Inference of TEM step - 1. load the best_model step - 2. the output of TEM is three pdf-curve for each scaled-video ''' # step - 1 model = TEM(opt) checkpoint = torch.load(opt["checkpoint_path"]+"/tem_best.pth.tar") base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())} model.load_state_dict(base_dict) model = torch.nn.DataParallel(model, device_ids=[0]).cuda() model.eval() # step - 2 # set subset = 'full' to generate the pdf of all video test_loader = torch.utils.data.DataLoader(VideoDataSet(opt,subset="full"), batch_size=model.module.batch_size, shuffle=False, num_workers=8, pin_memory=True,drop_last=False) columns = ['action', 'start', 'end', 'xmin', 'xmax'] for index_list, input_data, anchor_xmin, anchor_xmax in test_loader: TEM_output = model(input_data).detach().cpu().numpy() batch_action = TEM_output[:,0,:] batch_start = TEM_output[:,1,:] batch_end = TEM_output[:,2,:] index_list = index_list.numpy() anchor_xmin = np.array([x.numpy()[0] for x in anchor_xmin]) anchor_xmax = np.array([x.numpy()[0] for x in anchor_xmax]) for batch_idx, full_idx in enumerate(index_list): video_name = test_loader.dataset.video_list[full_idx] video_action = batch_action[batch_idx] video_start = batch_start[batch_idx] video_end = batch_end[batch_idx] video_result = np.stack((video_action, video_start, video_end, anchor_xmin, anchor_xmax),axis=1) video_df = pd.DataFrame(video_result, columns=columns) video_df.to_csv('./output/TEM_results/' + video_name + '.csv', index=False)
def BSN_Train_TEM(opt): writer = SummaryWriter() model = TEM(opt) model = torch.nn.DataParallel(model, device_ids=[0]).cuda() state_dict = torch.load('checkpoint/tem_best.pth.tar')['state_dict'] model.load_state_dict(state_dict) optimizer = optim.Adam(model.parameters(), lr=opt["tem_training_lr"], weight_decay=opt["tem_weight_decay"]) train_loader = torch.utils.data.DataLoader( VideoDataSet(opt, subset="train"), batch_size=model.module.batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=True) test_loader = torch.utils.data.DataLoader( VideoDataSet(opt, subset="validation"), batch_size=model.module.batch_size, shuffle=False, num_workers=4, pin_memory=True, drop_last=True) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=opt["tem_step_size"], gamma=opt["tem_step_gamma"]) for epoch in range(opt["tem_epoch"]): train_TEM(train_loader, model, optimizer, epoch, writer, opt) scheduler.step() if (epoch + 1) % 3 == 0: test_TEM(test_loader, model, epoch, writer, opt) writer.close()
def BSN_inference_TEM(opt): model = TEM(opt)
def BSN_inference_TEM(opt): output_dir = os.path.join(opt['tem_results_dir'], opt['checkpoint_path'].split('/')[-1]) print(sorted(opt.items()), flush=True) model = TEM(opt) checkpoint_epoch = opt['checkpoint_epoch'] if checkpoint_epoch is not None: checkpoint_path = os.path.join( opt['checkpoint_path'], 'tem_checkpoint.%d.pth' % checkpoint_epoch) output_dir = os.path.join(output_dir, 'ckpt.%d' % checkpoint_epoch) else: checkpoint_path = os.path.join(opt['checkpoint_path'], 'tem_best.pth') output_dir = os.path.join(output_dir, 'ckpt.best') if not os.path.exists(output_dir): os.makedirs(output_dir) print('Checkpoint path is ', checkpoint_path, flush=True) checkpoint = torch.load(checkpoint_path) base_dict = { '.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items()) } model.load_state_dict(base_dict) model = torch.nn.DataParallel(model).cuda() model.eval() if opt['dataset'] == 'gymnastics': img_loading_func = get_img_loader(opt) dataset = GymnasticsImages(opt, subset=opt['tem_results_subset'].title(), img_loading_func=img_loading_func, image_dir=opt['gym_image_dir'], video_info_path=os.path.join( opt['video_info'], 'Full_Annotation.csv')) elif opt['dataset'] == 'gymnasticsfeatures': # feature_dirs should roughly look like: # /checkpoint/cinjon/spaceofmotion/sep052019/tsn.1024.426x240.12.no-oversample/csv/rgb,/checkpoint/cinjon/spaceofmotion/sep052019/tsn.1024.426x240.12.no-oversample/csv/flow feature_dirs = opt['feature_dirs'].split(',') dataset = GymnasticsFeatures(opt, subset=opt['tem_results_subset'].title(), feature_dirs=feature_dirs, video_info_path=os.path.join( opt['video_info'], 'Full_Annotation.csv')) elif opt['dataset'] == 'thumosfeatures': feature_dirs = opt['feature_dirs'].split(',') dataset = ThumosFeatures(opt, subset=opt['tem_results_subset'].title(), feature_dirs=feature_dirs, video_info_path=os.path.join( opt['video_info'], 'Full_Annotation.csv')) elif opt['dataset'] == 'thumosimages': img_loading_func = get_img_loader(opt) dataset = ThumosImages( opt, subset=opt['tem_results_subset'].title(), img_loading_func=img_loading_func, image_dir='/checkpoint/cinjon/thumos/rawframes.TH14_%s_tal.30' % opt['tem_results_subset'], video_info_path=os.path.join(opt['video_info'], 'Full_Annotation.csv')) elif opt['dataset'] == 'activitynet': representation_module = opt['representation_module'] test_transforms = get_video_transforms(representation_module, False) dataset = VideoDataset(opt, test_transforms, subset='full', fraction=1.0) test_loader = torch.utils.data.DataLoader( dataset, batch_size=model.module.batch_size, shuffle=False, num_workers=opt['data_workers'], pin_memory=True, drop_last=False) columns = ["action", "start", "end", "frames"] all_vids = defaultdict(int) current_video = None current_start = defaultdict(float) current_end = defaultdict(float) current_action = defaultdict(float) calc_time_list = defaultdict(int) num_videoframes = opt['num_videoframes'] skip_videoframes = opt['skip_videoframes'] print('About to start enumerating', flush=True) for test_idx, (index_list, input_data, video_name, snippets) in enumerate(test_loader): if test_idx == 0: print('Started enumerating!', flush=True) # The data should be coming back s.t. consecutive data are from the same video. # until there is a breakpoint and it starts a new video. TEM_output = model(input_data).detach().cpu().numpy() batch_action = TEM_output[:, 0, :] batch_start = TEM_output[:, 1, :] batch_end = TEM_output[:, 2, :] index_list = index_list.numpy() for batch_idx, full_idx in enumerate(index_list): item_video = video_name[batch_idx] all_vids[item_video] += 1 item_snippets = snippets[batch_idx] if not current_video: print('First video: ', item_video, flush=True) current_video = item_video current_start = defaultdict(float) current_end = defaultdict(float) current_action = defaultdict(float) calc_time_list = defaultdict(int) elif item_video != current_video: print('Next video: ', item_video, full_idx, flush=True) column_frames = sorted(calc_time_list.keys()) column_action = [ current_action[k] * 1. / calc_time_list[k] for k in column_frames ] column_start = [ current_start[k] * 1. / calc_time_list[k] for k in column_frames ] column_end = [ current_end[k] * 1. / calc_time_list[k] for k in column_frames ] video_result = np.stack( [column_action, column_start, column_end], axis=1) column_frames = np.reshape(column_frames, [-1, 1]) video_result = np.concatenate([video_result, column_frames], axis=1) video_df = pd.DataFrame(video_result, columns=columns) path = os.path.join(output_dir, '%s.csv' % current_video) video_df.to_csv(path, index=False) current_video = item_video current_start = defaultdict(float) current_end = defaultdict(float) current_action = defaultdict(float) calc_time_list = defaultdict(int) for snippet_, action_, start_, end_ in zip(item_snippets, batch_action[batch_idx], batch_start[batch_idx], batch_end[batch_idx]): frame = snippet_.item() calc_time_list[frame] += 1 current_action[frame] += action_ current_start[frame] += start_ current_end[frame] += end_ if len(calc_time_list): column_frames = sorted(calc_time_list.keys()) column_action = [ current_action[k] * 1. / calc_time_list[k] for k in column_frames ] column_start = [ current_start[k] * 1. / calc_time_list[k] for k in column_frames ] column_end = [ current_end[k] * 1. / calc_time_list[k] for k in column_frames ] video_result = np.stack([column_action, column_start, column_end], axis=1) print(video_result.shape, flush=True) video_result = np.concatenate( [video_result, np.reshape(column_frames, [-1, 1])], axis=1) video_df = pd.DataFrame(video_result, columns=columns) path = os.path.join(output_dir, '%s.csv' % current_video) video_df.to_csv(path, index=False) print(len(all_vids))
def BSN_Train_TEM(opt): global_step = 0 epoch = 0 if opt['do_representation']: model = TEM(opt) optimizer = optim.Adam(model.parameters(), lr=opt["tem_training_lr"], weight_decay=opt["tem_weight_decay"]) global_step, epoch = _maybe_load_checkpoint( model, optimizer, global_step, epoch, os.path.join(opt["checkpoint_path"], opt['name'])) if opt['representation_checkpoint']: # print(model.representation_model.backbone.inception_5b_3x3.weight[0][0]) if opt['do_random_model']: print('DOING RANDOM MDOEL!!!') else: print('DOING Pretrianed modelll!!!') partial_load(opt['representation_checkpoint'], model) # print(model.representation_model.backbone.inception_5b_3x3.weight[0][0]) if not opt['no_freeze']: for param in model.representation_model.parameters(): param.requires_grad = False print(len([p for p in model.representation_model.parameters()])) else: model = TEM(opt) optimizer = optim.Adam(model.parameters(), lr=opt["tem_training_lr"], weight_decay=opt["tem_weight_decay"]) global_step, epoch = _maybe_load_checkpoint( model, optimizer, global_step, epoch, os.path.join(opt["checkpoint_path"], opt['name'])) model = torch.nn.DataParallel(model).cuda() # summary(model, (2, 3, 224, 224)) print(' Total params: %.2fM' % (sum(p.numel() for p in model.parameters()) / 1000000.0)) if opt['dataset'] == 'gymnastics': # default image_dir is '/checkpoint/cinjon/spaceofmotion/sep052019/rawframes.426x240.12' img_loading_func = get_img_loader(opt) train_data_set = GymnasticsImages(opt, subset='Train', img_loading_func=img_loading_func, image_dir=opt['gym_image_dir'], video_info_path=os.path.join( opt['video_info'], 'Train_Annotation.csv')) train_sampler = GymnasticsSampler(train_data_set, opt['sampler_mode']) test_data_set = GymnasticsImages(opt, subset="Val", img_loading_func=img_loading_func, image_dir=opt['gym_image_dir'], video_info_path=os.path.join( opt['video_info'], 'Val_Annotation.csv')) elif opt['dataset'] == 'gymnasticsfeatures': # feature_dirs should roughly look like: # /checkpoint/cinjon/spaceofmotion/sep052019/tsn.1024.426x240.12.no-oversample/csv/rgb,/checkpoint/cinjon/spaceofmotion/sep052019/tsn.1024.426x240.12.no-oversample/csv/flow feature_dirs = opt['feature_dirs'].split(',') train_data_set = GymnasticsFeatures(opt, subset='Train', feature_dirs=feature_dirs, video_info_path=os.path.join( opt['video_info'], 'Train_Annotation.csv')) test_data_set = GymnasticsFeatures(opt, subset='Val', feature_dirs=feature_dirs, video_info_path=os.path.join( opt['video_info'], 'Val_Annotation.csv')) train_sampler = None elif opt['dataset'] == 'thumosfeatures': feature_dirs = opt['feature_dirs'].split(',') train_data_set = ThumosFeatures(opt, subset='Val', feature_dirs=feature_dirs) test_data_set = ThumosFeatures(opt, subset="Test", feature_dirs=feature_dirs) train_sampler = None elif opt['dataset'] == 'thumosimages': img_loading_func = get_img_loader(opt) train_data_set = ThumosImages( opt, subset='Val', img_loading_func=img_loading_func, image_dir= '/checkpoint/cinjon/thumos/rawframes.TH14_validation_tal.30', video_info_path=os.path.join(opt['video_info'], 'Val_Annotation.csv')) test_data_set = ThumosImages( opt, subset='Test', img_loading_func=img_loading_func, image_dir='/checkpoint/cinjon/thumos/rawframes.TH14_test_tal.30', video_info_path=os.path.join(opt['video_info'], 'Test_Annotation.csv')) train_sampler = None elif opt['dataset'] == 'activitynet': train_sampler = None representation_module = opt['representation_module'] train_transforms = get_video_transforms(representation_module, opt['do_augment']) test_transforms = get_video_transforms(representation_module, False) train_data_set = VideoDataset(opt, train_transforms, subset='train', fraction=0.3) # We use val because we don't have annotations for test. test_data_set = VideoDataset(opt, test_transforms, subset='val', fraction=0.3) print('train_loader / val_loader sizes: ', len(train_data_set), len(test_data_set)) train_loader = torch.utils.data.DataLoader( train_data_set, batch_size=model.module.batch_size, shuffle=False if train_sampler else True, sampler=train_sampler, num_workers=opt['data_workers'], pin_memory=True, drop_last=False) test_loader = torch.utils.data.DataLoader( test_data_set, batch_size=model.module.batch_size, shuffle=False, num_workers=opt['data_workers'], pin_memory=True, drop_last=False) # test_loader = None milestones = [int(k) for k in opt['tem_lr_milestones'].split(',')] scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestones, gamma=opt['tem_step_gamma']) if opt['log_to_comet']: comet_exp = CometExperiment(api_key="hIXq6lDzWzz24zgKv7RYz6blo", project_name="bsn", workspace="cinjon", auto_metric_logging=True, auto_output_logging=None, auto_param_logging=False) elif opt['local_comet_dir']: comet_exp = OfflineExperiment(api_key="hIXq6lDzWzz24zgKv7RYz6blo", project_name="bsn", workspace="cinjon", auto_metric_logging=True, auto_output_logging=None, auto_param_logging=False, offline_directory=opt['local_comet_dir']) else: comet_exp = None if comet_exp: comet_exp.log_parameters(opt) comet_exp.set_name(opt['name']) # test_TEM(test_loader, model, optimizer, 0, 0, comet_exp, opt) for epoch in range(epoch + 1, opt["tem_epoch"] + 1): global_step = train_TEM(train_loader, model, optimizer, epoch, global_step, comet_exp, opt) test_TEM(test_loader, model, optimizer, epoch, global_step, comet_exp, opt) if opt['dataset'] == 'activitynet': test_loader.dataset._subset_dataset(.3) train_loader.dataset._subset_dataset(.3) scheduler.step()