def main(): global args args = (parser.parse_args()) ckpt_idx = args.fileid proposal_save_file = 'Dev/NetModules/ActionLocalizationDevs/PropEval/baselines_results/inception-s4-EMD-gru-aug-{:04d}_thumos14_test.csv'.format( ckpt_idx) feature_directory = os.path.join(user_home_directory, 'datasets/THUMOS14/features/BNInception') ground_truth_file = os.path.join( user_home_directory, '/home/zwei/Dev/NetModules/ActionLocalizationDevs/action_det_prep/thumos14_tag_test_proposal_list.csv' ) ground_truth = pd.read_csv(ground_truth_file, sep=' ') target_video_frms = ground_truth[['video-name', 'video-frames']].drop_duplicates().values frm_nums = {} for s_target_videofrms in target_video_frms: frm_nums[s_target_videofrms[0]] = s_target_videofrms[1] target_file_names = ground_truth['video-name'].unique() feature_file_ext = 'npy' use_cuda = cuda_model.ifUseCuda(args.gpu_id, args.multiGpu) # Pretty print the run args pp.pprint(vars(args)) model = PointerNetwork(input_dim=args.input_dim, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, max_decoding_len=args.net_outputs) print("Number of Params\t{:d}".format( sum([p.data.nelement() for p in model.parameters()]))) model = cuda_model.convertModel2Cuda(model, gpu_id=args.gpu_id, multiGpu=args.multiGpu) model.eval() if args.eval is not None: # if os.path.isfile(args.resume): ckpt_filename = os.path.join( args.eval, 'checkpoint_{:04d}.pth.tar'.format(ckpt_idx)) assert os.path.isfile( ckpt_filename), 'Error: no checkpoint directory found!' checkpoint = torch.load(ckpt_filename, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['state_dict'], strict=True) train_iou = checkpoint['IoU'] print("=> loading checkpoint '{}', current iou: {:.04f}".format( ckpt_filename, train_iou)) predict_results = {} overlap = 0.6 seq_length = 90 sample_rate = [1, 2, 4] for s_sample_rate in sample_rate: for video_idx, s_target_filename in enumerate(target_file_names): if not os.path.exists( os.path.join( feature_directory, '{:s}.{:s}'.format( s_target_filename, feature_file_ext))): print('{:s} Not found'.format(s_target_filename)) continue s_feature_path = os.path.join( feature_directory, '{:s}.{:s}'.format(s_target_filename, feature_file_ext)) singlevideo_data = SingleVideoLoader(feature_path=s_feature_path, seq_length=seq_length, overlap=overlap, sample_rate=[s_sample_rate]) n_video_len = singlevideo_data.n_features n_video_clips = len(singlevideo_data.video_clips) singlevideo_dataset = DataLoader(singlevideo_data, batch_size=args.batch_size, shuffle=False, num_workers=4) predict_proposals = [] for batch_idx, data in enumerate(singlevideo_dataset): clip_feature = Variable(data[0], requires_grad=False) clip_start_positions = Variable(data[1], requires_grad=False) clip_end_positions = Variable(data[2], requires_grad=False) if use_cuda: clip_feature = clip_feature.cuda() clip_start_positions = clip_start_positions.cuda() clip_end_positions = clip_end_positions.cuda() clip_start_positions = clip_start_positions.repeat( 1, args.net_outputs) clip_end_positions = clip_end_positions.repeat( 1, args.net_outputs) head_pointer_probs, head_positions, tail_pointer_probs, tail_positions, cls_scores, _ = model( clip_feature) cls_scores = F.softmax(cls_scores, dim=2) head_positions, tail_positions = helper.reorder( head_positions, tail_positions) head_positions = (head_positions * s_sample_rate + clip_start_positions) tail_positions = (tail_positions * s_sample_rate + clip_start_positions) cls_scores = cls_scores[:, :, 1].contiguous().view(-1) head_positions = head_positions.contiguous().view(-1) tail_positions = tail_positions.contiguous().view(-1) outputs = torch.stack([ head_positions.float(), tail_positions.float(), cls_scores ], dim=-1) outputs = outputs.data.cpu().numpy() for output_idx, s_output in enumerate(outputs): if s_output[0] == s_output[1]: s_output[0] -= s_sample_rate / 2 s_output[1] += s_sample_rate / 2 s_output[0] = max(0, s_output[0]) s_output[1] = min(n_video_len, s_output[1]) outputs[output_idx] = s_output predict_proposals.append(outputs) predict_proposals = np.concatenate(predict_proposals, axis=0) predict_proposals, _ = PropUtils.non_maxima_supression( predict_proposals, overlap=0.999) # sorted_idx = np.argsort(predict_proposals[:,-1])[::-1] # predict_proposals = predict_proposals[sorted_idx] if s_target_filename in predict_results.keys(): predict_results[s_target_filename] = np.concatenate( (predict_results[s_target_filename], predict_proposals), axis=0) else: predict_results[s_target_filename] = predict_proposals n_proposals = len(predict_proposals) print( "[{:d} | {:d}]{:s}\t {:d} Frames\t {:d} Clips\t{:d} Proposals @ rate:{:d}" .format(video_idx, len(target_file_names), s_target_filename, n_video_len, n_video_clips, n_proposals, s_sample_rate)) data_frame = pkl_frame2dataframe(predict_results, frm_nums) results = pd.DataFrame( data_frame, columns=['f-end', 'f-init', 'score', 'video-frames', 'video-name']) results.to_csv(os.path.join(user_home_directory, proposal_save_file), sep=' ', index=False)
def main(): global args args = (parser.parse_args()) use_cuda = cuda_model.ifUseCuda(args.gpu_id, args.multiGpu) # Pretty print the run args pp.pprint(vars(args)) model = PointerNetwork(input_dim=args.input_dim, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, max_decoding_len=args.net_outputs, dropout=0.5) print("Number of Params\t{:d}".format( sum([p.data.nelement() for p in model.parameters()]))) save_directory = 'gru2heads_proposal_s4-3_cls_AUG_ckpt' if args.resume is not None: ckpt_idx = 3 ckpt_filename = args.resume.format(ckpt_idx) assert os.path.isfile( ckpt_filename), 'Error: no checkpoint directory found!' checkpoint = torch.load(ckpt_filename, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['state_dict'], strict=False) train_iou = checkpoint['IoU'] args.start_epoch = checkpoint['epoch'] print("=> loading checkpoint '{}', current iou: {:.04f}".format( ckpt_filename, train_iou)) model = cuda_model.convertModel2Cuda(model, gpu_id=args.gpu_id, multiGpu=args.multiGpu) train_dataset = THUMOST14(seq_length=args.seq_len, overlap=0.9, sample_rate=[1, 2, 4], dataset_split='train', rdDrop=True, rdOffset=True) val_dataset = THUMOST14(seq_length=args.seq_len, overlap=0.9, sample_rate=[1, 2, 4], dataset_split='val', rdDrop=True, rdOffset=True) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4) val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4) model_optim = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=float(args.lr)) optim_scheduler = optim.lr_scheduler.ReduceLROnPlateau(model_optim, 'min') alpha = 0.1 cls_weights = torch.FloatTensor([0.05, 1.0]).cuda() for epoch in range(args.start_epoch, args.nof_epoch + args.start_epoch): total_losses = AverageMeter() loc_losses = AverageMeter() cls_losses = AverageMeter() Accuracy = AverageMeter() IOU = AverageMeter() ordered_IOU = AverageMeter() model.train() pbar = progressbar.ProgressBar(max_value=len(train_dataloader)) for i_batch, sample_batch in enumerate(train_dataloader): pbar.update(i_batch) feature_batch = Variable(sample_batch[0]) start_indices = Variable(sample_batch[1]) end_indices = Variable(sample_batch[2]) valid_indices = Variable(sample_batch[3]) if use_cuda: feature_batch = feature_batch.cuda() start_indices = start_indices.cuda() end_indices = end_indices.cuda() gt_positions = torch.stack([start_indices, end_indices], dim=-1) head_pointer_probs, head_positions, tail_pointer_probs, tail_positions, cls_scores, _ = model( feature_batch) pred_positions = torch.stack([head_positions, tail_positions], dim=-1) assigned_scores, assigned_locations = h_assign.Assign_Batch( gt_positions, pred_positions, valid_indices, thres=0.5) # if np.sum(assigned_scores) > 1: # print("DEBUG") # correct_predictions = np.sum(assigned_scores[:,:args.n_outputs]) # cls_rate = correct_predictions*1./np.sum(assigned_scores) if np.sum(assigned_scores) >= 1: iou_rate, effective_positives = Metrics.get_avg_iou2( np.reshape(pred_positions.data.cpu().numpy(), (-1, 2)), np.reshape(assigned_locations, (-1, 2)), np.reshape( assigned_scores, assigned_scores.shape[0] * assigned_scores.shape[1])) IOU.update(iou_rate / (effective_positives), effective_positives) # ordered_IOU.update(ordered_iou_rate/(args.batch_size*args.n_outputs),args.batch_size*args.n_outputs) # n_effective_batches += 1 assigned_scores = Variable(torch.LongTensor(assigned_scores), requires_grad=False) assigned_locations = Variable(torch.LongTensor(assigned_locations), requires_grad=False) if use_cuda: assigned_scores = assigned_scores.cuda() assigned_locations = assigned_locations.cuda() cls_scores = cls_scores.contiguous().view(-1, cls_scores.size()[-1]) assigned_scores = assigned_scores.contiguous().view(-1) cls_loss = F.cross_entropy(cls_scores, assigned_scores, weight=cls_weights) if torch.sum(assigned_scores) > 0: # print("HAHA") assigned_head_positions = assigned_locations[:, :, 0] assigned_head_positions = assigned_head_positions.contiguous( ).view(-1) # assigned_tail_positions = assigned_locations[:, :, 1] assigned_tail_positions = assigned_tail_positions.contiguous( ).view(-1) head_pointer_probs = head_pointer_probs.contiguous().view( -1, head_pointer_probs.size()[-1]) tail_pointer_probs = tail_pointer_probs.contiguous().view( -1, tail_pointer_probs.size()[-1]) # mask here: if there is non in assigned scores, no need to compute ... assigned_head_positions = torch.masked_select( assigned_head_positions, assigned_scores.byte()) assigned_tail_positions = torch.masked_select( assigned_tail_positions, assigned_scores.byte()) head_pointer_probs = torch.index_select( head_pointer_probs, dim=0, index=assigned_scores.nonzero().squeeze(1)) tail_pointer_probs = torch.index_select( tail_pointer_probs, dim=0, index=assigned_scores.nonzero().squeeze(1)) prediction_head_loss = F.cross_entropy( head_pointer_probs, assigned_head_positions) prediction_tail_loss = F.cross_entropy( tail_pointer_probs, assigned_tail_positions) loc_losses.update( prediction_head_loss.data.item() + prediction_tail_loss.data.item(), feature_batch.size(0)) total_loss = alpha * (prediction_head_loss + prediction_tail_loss) + cls_loss else: total_loss = cls_loss model_optim.zero_grad() total_loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 1.) model_optim.step() cls_losses.update(cls_loss.data.item(), feature_batch.size(0)) total_losses.update(total_loss.item(), feature_batch.size(0)) print( "Train -- Epoch :{:06d}, LR: {:.6f},\tloss={:.4f}, \t c-loss:{:.4f}, \tloc-loss:{:.4f}\tcls-Accuracy:{:.4f}\tloc-Avg-IOU:{:.4f}\t topIOU:{:.4f}" .format(epoch, model_optim.param_groups[0]['lr'], total_losses.avg, cls_losses.avg, loc_losses.avg, Accuracy.avg, IOU.avg, ordered_IOU.avg)) model.eval() total_losses = AverageMeter() loc_losses = AverageMeter() cls_losses = AverageMeter() Accuracy = AverageMeter() IOU = AverageMeter() ordered_IOU = AverageMeter() pbar = progressbar.ProgressBar(max_value=len(val_dataloader)) for i_batch, sample_batch in enumerate(val_dataloader): pbar.update(i_batch) feature_batch = Variable(sample_batch[0]) start_indices = Variable(sample_batch[1]) end_indices = Variable(sample_batch[2]) valid_indices = Variable(sample_batch[3]) if use_cuda: feature_batch = feature_batch.cuda() start_indices = start_indices.cuda() end_indices = end_indices.cuda() gt_positions = torch.stack([start_indices, end_indices], dim=-1) head_pointer_probs, head_positions, tail_pointer_probs, tail_positions, cls_scores, _ = model( feature_batch) pred_positions = torch.stack([head_positions, tail_positions], dim=-1) assigned_scores, assigned_locations = h_assign.Assign_Batch( gt_positions, pred_positions, valid_indices, thres=0.5) # if np.sum(assigned_scores) > 1: # print("DEBUG") # correct_predictions = np.sum(assigned_scores[:,:args.n_outputs]) # cls_rate = correct_predictions*1./np.sum(assigned_scores) if np.sum(assigned_scores) >= 1: iou_rate, effective_positives = Metrics.get_avg_iou2( np.reshape(pred_positions.data.cpu().numpy(), (-1, 2)), np.reshape(assigned_locations, (-1, 2)), np.reshape( assigned_scores, assigned_scores.shape[0] * assigned_scores.shape[1])) IOU.update(iou_rate / (effective_positives), effective_positives) assigned_scores = Variable(torch.LongTensor(assigned_scores), requires_grad=False) assigned_locations = Variable(torch.LongTensor(assigned_locations), requires_grad=False) if use_cuda: assigned_scores = assigned_scores.cuda() assigned_locations = assigned_locations.cuda() cls_scores = cls_scores.contiguous().view(-1, cls_scores.size()[-1]) assigned_scores = assigned_scores.contiguous().view(-1) cls_loss = F.cross_entropy(cls_scores, assigned_scores, weight=cls_weights) if torch.sum(assigned_scores) > 0: # print("HAHA") assigned_head_positions = assigned_locations[:, :, 0] assigned_head_positions = assigned_head_positions.contiguous( ).view(-1) # assigned_tail_positions = assigned_locations[:, :, 1] assigned_tail_positions = assigned_tail_positions.contiguous( ).view(-1) head_pointer_probs = head_pointer_probs.contiguous().view( -1, head_pointer_probs.size()[-1]) tail_pointer_probs = tail_pointer_probs.contiguous().view( -1, tail_pointer_probs.size()[-1]) # mask here: if there is non in assigned scores, no need to compute ... assigned_head_positions = torch.masked_select( assigned_head_positions, assigned_scores.byte()) assigned_tail_positions = torch.masked_select( assigned_tail_positions, assigned_scores.byte()) head_pointer_probs = torch.index_select( head_pointer_probs, dim=0, index=assigned_scores.nonzero().squeeze(1)) tail_pointer_probs = torch.index_select( tail_pointer_probs, dim=0, index=assigned_scores.nonzero().squeeze(1)) prediction_head_loss = F.cross_entropy( head_pointer_probs, assigned_head_positions) prediction_tail_loss = F.cross_entropy( tail_pointer_probs, assigned_tail_positions) loc_losses.update( prediction_head_loss.data.item() + prediction_tail_loss.data.item(), feature_batch.size(0)) total_loss = alpha * (prediction_head_loss + prediction_tail_loss) + cls_loss else: total_loss = cls_loss cls_losses.update(cls_loss.data.item(), feature_batch.size(0)) total_losses.update(total_loss.item(), feature_batch.size(0)) print( "Val -- Epoch :{:06d}, LR: {:.6f},\tloss={:.4f}, \t c-loss:{:.4f}, \tloc-loss:{:.4f}\tcls-Accuracy:{:.4f}\tloc-Avg-IOU:{:.4f}\t topIOU:{:.4f}" .format(epoch, model_optim.param_groups[0]['lr'], total_losses.avg, cls_losses.avg, loc_losses.avg, Accuracy.avg, IOU.avg, ordered_IOU.avg)) if epoch % 1 == 0: save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'loss': total_losses.avg, 'cls_loss': cls_losses.avg, 'loc_loss': loc_losses.avg, 'IoU': IOU.avg }, (epoch + 1), file_direcotry=save_directory) optim_scheduler.step(total_losses.avg)
def main(): global args args = (parser.parse_args()) use_cuda = cuda_model.ifUseCuda(args.gpu_id, args.multiGpu) pp.pprint(vars(args)) model = PointerNetwork(input_dim=args.input_dim, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, max_decoding_len=args.net_outputs, dropout=0.5) print("Number of Params\t{:d}".format( sum([p.data.nelement() for p in model.parameters()]))) # save_directory = 'gru2heads_proposal_s4-2_ckpt' if args.resume is not None: ckpt_idx = 2 ckpt_filename = args.resume.format(ckpt_idx) assert os.path.isfile( ckpt_filename), 'Error: no checkpoint directory found!' checkpoint = torch.load(ckpt_filename, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['state_dict'], strict=True) args.start_epoch = checkpoint['epoch'] train_iou = checkpoint['IoU'] train_tloss = checkpoint['loss'] train_cls_loss = checkpoint['cls_loss'] train_loc_loss = checkpoint['loc_loss'] print( "=> loading checkpoint '{}', total loss: {:.04f},\t cls_loss: {:.04f},\t loc_loss: {:.04f}," " \tcurrent iou: {:.04f}".format(ckpt_filename, train_tloss, train_cls_loss, train_loc_loss, train_iou)) model = cuda_model.convertModel2Cuda(model, gpu_id=args.gpu_id, multiGpu=args.multiGpu) train_dataset = THUMOST14(seq_length=args.seq_len, overlap=0.9, sample_rate=[1, 2, 4], dataset_split='val') train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4) model_optim = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=float(args.lr)) optim_scheduler = optim.lr_scheduler.ReduceLROnPlateau(model_optim, 'min') alpha = 0.1 for epoch in range(args.start_epoch, args.nof_epoch + args.start_epoch): total_losses = AverageMeter() loc_losses = AverageMeter() cls_losses = AverageMeter() Accuracy = AverageMeter() IOU = AverageMeter() ordered_IOU = AverageMeter() #Update here! model.eval() pbar = progressbar.ProgressBar(max_value=len(train_dataloader)) for i_batch, sample_batch in enumerate(train_dataloader): # pbar.update(i_batch) feature_batch = Variable(sample_batch[0]) start_indices = Variable(sample_batch[1]) end_indices = Variable(sample_batch[2]) valid_indices = Variable(sample_batch[3]) if use_cuda: feature_batch = feature_batch.cuda() start_indices = start_indices.cuda() end_indices = end_indices.cuda() # gt_positions = torch.stack([start_indices, end_indices], dim=-1) head_pointer_probs, head_positions, tail_pointer_probs, tail_positions, cls_scores, _ = model( feature_batch) pred_positions = torch.stack([head_positions, tail_positions], dim=-1) assigned_scores, assigned_locations = h_match.Assign_Batch( gt_positions, pred_positions, valid_indices, thres=0.5) if valid_indices.byte().any() > 0: print "Output at {:d}".format(i_batch) # n_valid = valid_indices.data[0, 0] view_idx = valid_indices.nonzero()[0][0].item() n_valid = valid_indices[view_idx, 0].item() print "GT:" print(gt_positions[view_idx, :n_valid, :]) print("Pred") print(pred_positions[view_idx]) _, head_sort = head_pointer_probs[view_idx, 0, :].sort() _, tail_sort = tail_pointer_probs[view_idx, 0, :].sort() print("END of {:d}".format(i_batch)) # iou_rate, effective_positives = Metrics.get_avg_iou2( np.reshape(pred_positions.data.cpu().numpy(), (-1, 2)), np.reshape(assigned_locations, (-1, 2)), np.reshape( assigned_scores, assigned_scores.shape[0] * assigned_scores.shape[1])) IOU.update(iou_rate / (effective_positives), effective_positives) assigned_scores = Variable(torch.LongTensor(assigned_scores), requires_grad=False) assigned_locations = Variable( torch.LongTensor(assigned_locations), requires_grad=False) if use_cuda: assigned_scores = assigned_scores.cuda() assigned_locations = assigned_locations.cuda() cls_scores = cls_scores.contiguous().view( -1, cls_scores.size()[-1]) assigned_scores = assigned_scores.contiguous().view(-1) cls_loss = F.cross_entropy(cls_scores, assigned_scores) assigned_head_positions = assigned_locations[:, :, 0] assigned_head_positions = assigned_head_positions.contiguous( ).view(-1) # assigned_tail_positions = assigned_locations[:, :, 1] assigned_tail_positions = assigned_tail_positions.contiguous( ).view(-1) head_pointer_probs = head_pointer_probs.contiguous().view( -1, head_pointer_probs.size()[-1]) tail_pointer_probs = tail_pointer_probs.contiguous().view( -1, tail_pointer_probs.size()[-1]) # start_indices = start_indices.contiguous().view(-1) # end_indices = end_indices.contiguous().view(-1) # with case instances.... prediction_head_loss = F.cross_entropy((head_pointer_probs), assigned_head_positions, reduce=False) prediction_head_loss = torch.mean(prediction_head_loss * assigned_scores.float()) prediction_tail_loss = F.cross_entropy((tail_pointer_probs), assigned_tail_positions, reduce=False) prediction_tail_loss = torch.mean(prediction_tail_loss * assigned_scores.float()) total_loss = alpha * (prediction_head_loss + prediction_tail_loss) + cls_loss # # # model_optim.zero_grad() # # total_loss.backward() # # torch.nn.utils.clip_grad_norm(model.parameters(), 1.) # # model_optim.step() cls_losses.update(cls_loss.data.item(), feature_batch.size(0)) loc_losses.update( prediction_head_loss.data.item() + prediction_tail_loss.data.item(), feature_batch.size(0)) total_losses.update(total_loss.data.item(), feature_batch.size(0)) # # print( "Train -- Epoch :{:06d}, LR: {:.6f},\tloss={:.4f}, \t c-loss:{:.4f}, \tloc-loss:{:.4f}\tcls-Accuracy:{:.4f}\tloc-Avg-IOU:{:.4f}\t topIOU:{:.4f}" .format(epoch, model_optim.param_groups[0]['lr'], total_losses.avg, cls_losses.avg, loc_losses.avg, Accuracy.avg, IOU.avg, ordered_IOU.avg)) break