Esempio n. 1
0
    def eval_save(self):
        data_loaders = [self.test_loader]
        meters = collections.defaultdict(lambda: AverageMeter())
        time_meter = TimeMeter()
        f = open('./our.txt','w')
        self.model.eval()
        with torch.no_grad():
            for data_loader in data_loaders:
                for bid, (video, video_mask, words, word_mask,
                          label, scores, scores_mask, id2pos, node_mask, adj_mat) in enumerate(data_loader, 1):
                    self.optimizer.zero_grad()

                    model_input = {
                        'frames': video.cuda(),
                        'frame_mask': video_mask.cuda(), 'words': words.cuda(), 'word_mask': word_mask.cuda(),
                        'label': scores.cuda(), 'label_mask': scores_mask.cuda(), 'gt': label.cuda(),
                        'node_pos': id2pos.cuda(), 'node_mask': node_mask.cuda(), 'adj_mat': adj_mat.cuda()
                    }

                    predict_boxes, loss, _, a1, a2 = self.model(**model_input)
                    loss = torch.mean(loss)
                    time_meter.update()
                    if bid % self.args.display_n_batches == 0:
                        logging.info('%.3f seconds/batch' % (
                            1.0 / time_meter.avg
                        ))
                    meters['loss'].update(loss.item())
                    a1, a2 = a1.cpu().numpy(), a2.cpu().numpy()
                    np.save('a1.npy',a1)
                    np.save('a2.npy',a2) 
                    video_mask = video_mask.cpu().numpy()
                    gt_boxes = model_input['gt'].cpu().numpy()
                    predict_boxes = np.round(predict_boxes.cpu().numpy()).astype(np.int32)
                    gt_starts, gt_ends = gt_boxes[:, 0], gt_boxes[:, 1]
                    predict_starts, predict_ends = predict_boxes[:, 0], predict_boxes[:, 1]
                    predict_starts[predict_starts < 0] = 0
                    seq_len = np.sum(video_mask, -1)
                    predict_ends[predict_ends >= seq_len] = seq_len[predict_ends >= seq_len] - 1
                    IoUs = criteria.calculate_IoU_batch((predict_starts, predict_ends),
                                                        (gt_starts, gt_ends))
                    for kk in range(predict_starts.shape[0]):
                        f.write('IoU: '+str(IoUs[kk])+' start: '+str(predict_starts[kk])+' ends: '+str(predict_ends[kk])+' gt: '+str(gt_starts[kk])+' '+str(gt_ends[kk])+'\n')
                    meters['mIoU'].update(np.mean(IoUs), IoUs.shape[0])
                    for i in range(1, 10, 2):
                        meters['IoU@0.%d' % i].update(np.mean(IoUs >= (i / 10)), IoUs.shape[0])
                if data_loaders.index(data_loader) == 0:
                    print('--------val')
                else:
                    print('--------test')
                print('| ', end='')
                for key, value in meters.items():
                    print('{}, {:.4f}'.format(key, value.avg), end=' | ')
                    meters[key].reset()
                print()
Esempio n. 2
0
    def eval(self):
        data_loaders = [self.val_loader, self.test_loader]
        meters = collections.defaultdict(lambda: AverageMeter())

        self.model.eval()
        with torch.no_grad():
            for data_loader in data_loaders:
                for bid, (video, video_mask, words, word_mask,
                          label, scores, scores_mask, id2pos, node_mask, adj_mat) in enumerate(data_loader, 1):
                    self.optimizer.zero_grad()

                    model_input = {
                        'frames': video.cuda(),
                        'frame_mask': video_mask.cuda(), 'words': words.cuda(), 'word_mask': word_mask.cuda(),
                        'label': scores.cuda(), 'label_mask': scores_mask.cuda(), 'gt': label.cuda(),
                        'node_pos': id2pos.cuda(), 'node_mask': node_mask.cuda(), 'adj_mat': adj_mat.cuda()
                    }

                    predict_boxes, loss, _, _, _ = self.model(**model_input)
                    loss = torch.mean(loss)

                    meters['loss'].update(loss.item())
                    video_mask = video_mask.cpu().numpy()
                    gt_boxes = model_input['gt'].cpu().numpy()
                    predict_boxes = np.round(predict_boxes.cpu().numpy()).astype(np.int32)
                    gt_starts, gt_ends = gt_boxes[:, 0], gt_boxes[:, 1]
                    predict_starts, predict_ends = predict_boxes[:, 0], predict_boxes[:, 1]
                    predict_starts[predict_starts < 0] = 0
                    seq_len = np.sum(video_mask, -1)
                    predict_ends[predict_ends >= seq_len] = seq_len[predict_ends >= seq_len] - 1
                    IoUs = criteria.calculate_IoU_batch((predict_starts, predict_ends),
                                                        (gt_starts, gt_ends))
                    meters['mIoU'].update(np.mean(IoUs), IoUs.shape[0])
                    for i in range(1, 10, 2):
                        meters['IoU@0.%d' % i].update(np.mean(IoUs >= (i / 10)), IoUs.shape[0])
                print('| ', end='')
                for key, value in meters.items():
                    print('{}, {:.4f}'.format(key, value.avg), end=' | ')
                    meters[key].reset()
                print()
    def __getitem__(self, index):
        video, words, label, id2pos, adj_mat = super().__getitem__(index)

        ori_words_len = words.shape[0]
        # word padding
        if ori_words_len < self.max_num_words:
            word_mask = np.zeros([self.max_num_words], np.uint8)
            word_mask[range(ori_words_len)] = 1
            words = np.pad(words,
                           ((0, self.max_num_words - ori_words_len), (0, 0)),
                           mode='constant')
        else:
            word_mask = np.ones([self.max_num_words], np.uint8)
            words = words[:self.max_num_words]

        # video sampling
        ori_video_len = video.shape[0]
        video_mask = np.ones([self.max_num_frames], np.uint8)
        index = np.linspace(start=0,
                            stop=ori_video_len - 1,
                            num=self.max_num_frames).astype(np.int32)
        new_video = []
        for i in range(len(index) - 1):
            start = index[i]
            end = index[i + 1]
            if start == end or start + 1 == end:
                new_video.append(video[start])
            else:
                new_video.append(np.mean(video[start:end], 0))
        new_video.append(video[-1])
        video = np.stack(new_video, 0)

        # label recomputing
        # print(index, label)
        label[0] = min(np.where(index >= label[0])[0])
        if label[1] == ori_video_len - 1:
            label[1] = self.max_num_frames - 1
        else:
            label[1] = max(np.where(index <= label[1])[0])
        if label[1] < label[0]:
            label[0] = label[1]

        assert len(id2pos) == adj_mat.shape[0] == ori_words_len

        # some words have been cut out
        true_index = id2pos < self.max_num_words
        id2pos = id2pos[true_index]
        adj_mat = adj_mat[true_index]
        adj_mat = adj_mat[:, true_index]

        # node padding
        if id2pos.shape[0] < self.max_num_nodes:
            node_mask = np.zeros([self.max_num_nodes], np.uint8)
            node_mask[range(id2pos.shape[0])] = 1
            id2pos = np.pad(id2pos, (0, self.max_num_nodes - id2pos.shape[0]),
                            mode='constant')
            adj_mat = np.pad(adj_mat,
                             ((0, self.max_num_nodes - adj_mat.shape[0]),
                              (0, self.max_num_nodes - adj_mat.shape[1])),
                             mode='constant')
        else:
            node_mask = np.ones([self.max_num_nodes], np.uint8)
            id2pos = id2pos[:self.max_num_nodes]
            adj_mat = adj_mat[:self.max_num_nodes, :self.max_num_nodes]

        # scores computing
        proposals = np.reshape(self.proposals, [-1, 2])
        illegal = np.logical_or(proposals[:, 0] < 0,
                                proposals[:, 1] >= self.max_num_frames)
        label1 = np.repeat(np.expand_dims(label, 0), proposals.shape[0], 0)
        IoUs = criteria.calculate_IoU_batch((proposals[:, 0], proposals[:, 1]),
                                            (label1[:, 0], label1[:, 1]))
        IoUs[illegal] = 0.0  # [video_len * num_anchors]
        max_IoU = np.max(IoUs)
        if max_IoU == 0.0:
            print(illegal)
            print(label)
            print(proposals[illegal])
            print(proposals[1 - illegal])
            # print(IoUs)
            # print(label, max_IoU)
            exit(1)
        IoUs[IoUs < 0.3 * max_IoU] = 0.0
        IoUs = IoUs / max_IoU
        scores = IoUs.astype(np.float32)
        scores_mask = (1 - illegal).astype(np.uint8)
        return video, video_mask, words, word_mask, label, \
               scores, scores_mask, \
               id2pos, node_mask, adj_mat