def predict(self, sess, input_data, verbose=None): preds = [] true_label = [] lengths = [] for _, (b_data, b_order) in enumerate( helper.data_iter(input_data, self.config.batch_size)): order_indices = b_order pred = None ret_label = None sent_num_dec = None for i in range(self.config.processing_step): (ret_batch, ret_label, sent_num_enc, sent_num_dec, sent_len) = helper.shuffleData(b_data, order_indices, self.vocab) feed_dict = self.create_feed_dict(ret_batch, sent_len, sent_num_enc, ret_label, sent_num_dec) pred = sess.run(self.prediction, feed_dict=feed_dict) pred = pred.tolist() order_indices = helper.reorder(order_indices, pred, sent_num_dec) preds += pred true_label += ret_label.tolist() lengths += sent_num_dec return preds, true_label, lengths
def run_epoch(self, sess, input_data, verbose=None): """ Runs an epoch of training. Trains the model for one-epoch. Args: sess: tf.Session() object input_data: tuple of (encode_input, decode_input, decode_label) Returns: avg_loss: scalar. Average minibatch loss of model on epoch. """ data_len = len(input_data) total_steps =data_len // self.config.batch_size total_loss = [] for step, (b_data, b_order) in enumerate( helper.data_iter(input_data, self.config.batch_size)): order_indices = b_order losses = [] for i in range(self.config.processing_step): (ret_batch, ret_label, sent_num_enc, sent_num_dec, sent_len ) = helper.shuffleData(b_data, order_indices, self.vocab) feed_dict = self.create_feed_dict(ret_batch, sent_len, sent_num_enc, ret_label, sent_num_dec) _, loss, lr, pred = sess.run([self.train_op, self.loss, self.learning_rate, self.prediction], feed_dict=feed_dict) pred = pred.tolist() order_indices = helper.reorder(order_indices, pred, sent_num_dec) losses.append(loss) total_loss.append(np.mean(losses)) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : loss = {}, lr = {}'.format( step, total_steps, np.mean(total_loss[-verbose:]), lr)) sys.stdout.flush() sys.stdout.write('\n') avg_loss = np.mean(total_loss) return avg_loss
def fit(self, sess, input_data, verbose=None): """ Runs an epoch of validation or test. return test error Args: sess: tf.Session() object input_data: tuple of (encode_input, decode_input, decode_label) Returns: avg_loss: scalar. Average minibatch loss of model on epoch. """ total_loss = [] for step, (b_data, b_order) in enumerate( helper.data_iter(input_data, self.config.batch_size)): order_indices = b_order losses = [] for i in range(self.config.processing_step): (ret_batch, ret_label, sent_num_enc, sent_num_dec, sent_len) = helper.shuffleData(b_data, order_indices, self.vocab) feed_dict = self.create_feed_dict(ret_batch, sent_len, sent_num_enc, ret_label, sent_num_dec) loss, pred = sess.run([self.loss, self.prediction], feed_dict=feed_dict) pred = pred.tolist() order_indices = helper.reorder(order_indices, pred, sent_num_dec) losses.append(loss) total_loss.append(np.mean(losses)) avg_loss = np.mean(total_loss) return avg_loss
def main(): global args args = (parser.parse_args()) ckpt_idx = args.fileid proposal_save_file = 'Dev/NetModules/ActionLocalizationDevs/PropEval/baselines_results/inception-s4-EMD-gru-aug-{:04d}_thumos14_test.csv'.format( ckpt_idx) feature_directory = os.path.join(user_home_directory, 'datasets/THUMOS14/features/BNInception') ground_truth_file = os.path.join( user_home_directory, '/home/zwei/Dev/NetModules/ActionLocalizationDevs/action_det_prep/thumos14_tag_test_proposal_list.csv' ) ground_truth = pd.read_csv(ground_truth_file, sep=' ') target_video_frms = ground_truth[['video-name', 'video-frames']].drop_duplicates().values frm_nums = {} for s_target_videofrms in target_video_frms: frm_nums[s_target_videofrms[0]] = s_target_videofrms[1] target_file_names = ground_truth['video-name'].unique() feature_file_ext = 'npy' use_cuda = cuda_model.ifUseCuda(args.gpu_id, args.multiGpu) # Pretty print the run args pp.pprint(vars(args)) model = PointerNetwork(input_dim=args.input_dim, embedding_dim=args.embedding_dim, hidden_dim=args.hidden_dim, max_decoding_len=args.net_outputs) print("Number of Params\t{:d}".format( sum([p.data.nelement() for p in model.parameters()]))) model = cuda_model.convertModel2Cuda(model, gpu_id=args.gpu_id, multiGpu=args.multiGpu) model.eval() if args.eval is not None: # if os.path.isfile(args.resume): ckpt_filename = os.path.join( args.eval, 'checkpoint_{:04d}.pth.tar'.format(ckpt_idx)) assert os.path.isfile( ckpt_filename), 'Error: no checkpoint directory found!' checkpoint = torch.load(ckpt_filename, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['state_dict'], strict=True) train_iou = checkpoint['IoU'] print("=> loading checkpoint '{}', current iou: {:.04f}".format( ckpt_filename, train_iou)) predict_results = {} overlap = 0.6 seq_length = 90 sample_rate = [1, 2, 4] for s_sample_rate in sample_rate: for video_idx, s_target_filename in enumerate(target_file_names): if not os.path.exists( os.path.join( feature_directory, '{:s}.{:s}'.format( s_target_filename, feature_file_ext))): print('{:s} Not found'.format(s_target_filename)) continue s_feature_path = os.path.join( feature_directory, '{:s}.{:s}'.format(s_target_filename, feature_file_ext)) singlevideo_data = SingleVideoLoader(feature_path=s_feature_path, seq_length=seq_length, overlap=overlap, sample_rate=[s_sample_rate]) n_video_len = singlevideo_data.n_features n_video_clips = len(singlevideo_data.video_clips) singlevideo_dataset = DataLoader(singlevideo_data, batch_size=args.batch_size, shuffle=False, num_workers=4) predict_proposals = [] for batch_idx, data in enumerate(singlevideo_dataset): clip_feature = Variable(data[0], requires_grad=False) clip_start_positions = Variable(data[1], requires_grad=False) clip_end_positions = Variable(data[2], requires_grad=False) if use_cuda: clip_feature = clip_feature.cuda() clip_start_positions = clip_start_positions.cuda() clip_end_positions = clip_end_positions.cuda() clip_start_positions = clip_start_positions.repeat( 1, args.net_outputs) clip_end_positions = clip_end_positions.repeat( 1, args.net_outputs) head_pointer_probs, head_positions, tail_pointer_probs, tail_positions, cls_scores, _ = model( clip_feature) cls_scores = F.softmax(cls_scores, dim=2) head_positions, tail_positions = helper.reorder( head_positions, tail_positions) head_positions = (head_positions * s_sample_rate + clip_start_positions) tail_positions = (tail_positions * s_sample_rate + clip_start_positions) cls_scores = cls_scores[:, :, 1].contiguous().view(-1) head_positions = head_positions.contiguous().view(-1) tail_positions = tail_positions.contiguous().view(-1) outputs = torch.stack([ head_positions.float(), tail_positions.float(), cls_scores ], dim=-1) outputs = outputs.data.cpu().numpy() for output_idx, s_output in enumerate(outputs): if s_output[0] == s_output[1]: s_output[0] -= s_sample_rate / 2 s_output[1] += s_sample_rate / 2 s_output[0] = max(0, s_output[0]) s_output[1] = min(n_video_len, s_output[1]) outputs[output_idx] = s_output predict_proposals.append(outputs) predict_proposals = np.concatenate(predict_proposals, axis=0) predict_proposals, _ = PropUtils.non_maxima_supression( predict_proposals, overlap=0.999) # sorted_idx = np.argsort(predict_proposals[:,-1])[::-1] # predict_proposals = predict_proposals[sorted_idx] if s_target_filename in predict_results.keys(): predict_results[s_target_filename] = np.concatenate( (predict_results[s_target_filename], predict_proposals), axis=0) else: predict_results[s_target_filename] = predict_proposals n_proposals = len(predict_proposals) print( "[{:d} | {:d}]{:s}\t {:d} Frames\t {:d} Clips\t{:d} Proposals @ rate:{:d}" .format(video_idx, len(target_file_names), s_target_filename, n_video_len, n_video_clips, n_proposals, s_sample_rate)) data_frame = pkl_frame2dataframe(predict_results, frm_nums) results = pd.DataFrame( data_frame, columns=['f-end', 'f-init', 'score', 'video-frames', 'video-name']) results.to_csv(os.path.join(user_home_directory, proposal_save_file), sep=' ', index=False)