def eval_save(self): data_loaders = [self.test_loader] meters = collections.defaultdict(lambda: AverageMeter()) time_meter = TimeMeter() f = open('./our.txt','w') self.model.eval() with torch.no_grad(): for data_loader in data_loaders: for bid, (video, video_mask, words, word_mask, label, scores, scores_mask, id2pos, node_mask, adj_mat) in enumerate(data_loader, 1): self.optimizer.zero_grad() model_input = { 'frames': video.cuda(), 'frame_mask': video_mask.cuda(), 'words': words.cuda(), 'word_mask': word_mask.cuda(), 'label': scores.cuda(), 'label_mask': scores_mask.cuda(), 'gt': label.cuda(), 'node_pos': id2pos.cuda(), 'node_mask': node_mask.cuda(), 'adj_mat': adj_mat.cuda() } predict_boxes, loss, _, a1, a2 = self.model(**model_input) loss = torch.mean(loss) time_meter.update() if bid % self.args.display_n_batches == 0: logging.info('%.3f seconds/batch' % ( 1.0 / time_meter.avg )) meters['loss'].update(loss.item()) a1, a2 = a1.cpu().numpy(), a2.cpu().numpy() np.save('a1.npy',a1) np.save('a2.npy',a2) video_mask = video_mask.cpu().numpy() gt_boxes = model_input['gt'].cpu().numpy() predict_boxes = np.round(predict_boxes.cpu().numpy()).astype(np.int32) gt_starts, gt_ends = gt_boxes[:, 0], gt_boxes[:, 1] predict_starts, predict_ends = predict_boxes[:, 0], predict_boxes[:, 1] predict_starts[predict_starts < 0] = 0 seq_len = np.sum(video_mask, -1) predict_ends[predict_ends >= seq_len] = seq_len[predict_ends >= seq_len] - 1 IoUs = criteria.calculate_IoU_batch((predict_starts, predict_ends), (gt_starts, gt_ends)) for kk in range(predict_starts.shape[0]): f.write('IoU: '+str(IoUs[kk])+' start: '+str(predict_starts[kk])+' ends: '+str(predict_ends[kk])+' gt: '+str(gt_starts[kk])+' '+str(gt_ends[kk])+'\n') meters['mIoU'].update(np.mean(IoUs), IoUs.shape[0]) for i in range(1, 10, 2): meters['IoU@0.%d' % i].update(np.mean(IoUs >= (i / 10)), IoUs.shape[0]) if data_loaders.index(data_loader) == 0: print('--------val') else: print('--------test') print('| ', end='') for key, value in meters.items(): print('{}, {:.4f}'.format(key, value.avg), end=' | ') meters[key].reset() print()
def eval(self): data_loaders = [self.val_loader, self.test_loader] meters = collections.defaultdict(lambda: AverageMeter()) self.model.eval() with torch.no_grad(): for data_loader in data_loaders: for bid, (video, video_mask, words, word_mask, label, scores, scores_mask, id2pos, node_mask, adj_mat) in enumerate(data_loader, 1): self.optimizer.zero_grad() model_input = { 'frames': video.cuda(), 'frame_mask': video_mask.cuda(), 'words': words.cuda(), 'word_mask': word_mask.cuda(), 'label': scores.cuda(), 'label_mask': scores_mask.cuda(), 'gt': label.cuda(), 'node_pos': id2pos.cuda(), 'node_mask': node_mask.cuda(), 'adj_mat': adj_mat.cuda() } predict_boxes, loss, _, _, _ = self.model(**model_input) loss = torch.mean(loss) meters['loss'].update(loss.item()) video_mask = video_mask.cpu().numpy() gt_boxes = model_input['gt'].cpu().numpy() predict_boxes = np.round(predict_boxes.cpu().numpy()).astype(np.int32) gt_starts, gt_ends = gt_boxes[:, 0], gt_boxes[:, 1] predict_starts, predict_ends = predict_boxes[:, 0], predict_boxes[:, 1] predict_starts[predict_starts < 0] = 0 seq_len = np.sum(video_mask, -1) predict_ends[predict_ends >= seq_len] = seq_len[predict_ends >= seq_len] - 1 IoUs = criteria.calculate_IoU_batch((predict_starts, predict_ends), (gt_starts, gt_ends)) meters['mIoU'].update(np.mean(IoUs), IoUs.shape[0]) for i in range(1, 10, 2): meters['IoU@0.%d' % i].update(np.mean(IoUs >= (i / 10)), IoUs.shape[0]) print('| ', end='') for key, value in meters.items(): print('{}, {:.4f}'.format(key, value.avg), end=' | ') meters[key].reset() print()
def __getitem__(self, index): video, words, label, id2pos, adj_mat = super().__getitem__(index) ori_words_len = words.shape[0] # word padding if ori_words_len < self.max_num_words: word_mask = np.zeros([self.max_num_words], np.uint8) word_mask[range(ori_words_len)] = 1 words = np.pad(words, ((0, self.max_num_words - ori_words_len), (0, 0)), mode='constant') else: word_mask = np.ones([self.max_num_words], np.uint8) words = words[:self.max_num_words] # video sampling ori_video_len = video.shape[0] video_mask = np.ones([self.max_num_frames], np.uint8) index = np.linspace(start=0, stop=ori_video_len - 1, num=self.max_num_frames).astype(np.int32) new_video = [] for i in range(len(index) - 1): start = index[i] end = index[i + 1] if start == end or start + 1 == end: new_video.append(video[start]) else: new_video.append(np.mean(video[start:end], 0)) new_video.append(video[-1]) video = np.stack(new_video, 0) # label recomputing # print(index, label) label[0] = min(np.where(index >= label[0])[0]) if label[1] == ori_video_len - 1: label[1] = self.max_num_frames - 1 else: label[1] = max(np.where(index <= label[1])[0]) if label[1] < label[0]: label[0] = label[1] assert len(id2pos) == adj_mat.shape[0] == ori_words_len # some words have been cut out true_index = id2pos < self.max_num_words id2pos = id2pos[true_index] adj_mat = adj_mat[true_index] adj_mat = adj_mat[:, true_index] # node padding if id2pos.shape[0] < self.max_num_nodes: node_mask = np.zeros([self.max_num_nodes], np.uint8) node_mask[range(id2pos.shape[0])] = 1 id2pos = np.pad(id2pos, (0, self.max_num_nodes - id2pos.shape[0]), mode='constant') adj_mat = np.pad(adj_mat, ((0, self.max_num_nodes - adj_mat.shape[0]), (0, self.max_num_nodes - adj_mat.shape[1])), mode='constant') else: node_mask = np.ones([self.max_num_nodes], np.uint8) id2pos = id2pos[:self.max_num_nodes] adj_mat = adj_mat[:self.max_num_nodes, :self.max_num_nodes] # scores computing proposals = np.reshape(self.proposals, [-1, 2]) illegal = np.logical_or(proposals[:, 0] < 0, proposals[:, 1] >= self.max_num_frames) label1 = np.repeat(np.expand_dims(label, 0), proposals.shape[0], 0) IoUs = criteria.calculate_IoU_batch((proposals[:, 0], proposals[:, 1]), (label1[:, 0], label1[:, 1])) IoUs[illegal] = 0.0 # [video_len * num_anchors] max_IoU = np.max(IoUs) if max_IoU == 0.0: print(illegal) print(label) print(proposals[illegal]) print(proposals[1 - illegal]) # print(IoUs) # print(label, max_IoU) exit(1) IoUs[IoUs < 0.3 * max_IoU] = 0.0 IoUs = IoUs / max_IoU scores = IoUs.astype(np.float32) scores_mask = (1 - illegal).astype(np.uint8) return video, video_mask, words, word_mask, label, \ scores, scores_mask, \ id2pos, node_mask, adj_mat