Ejemplo n.º 1
0
    def __init__(self, dataset_name='TVSum', split='train', seq_length=90, overlap=0.9, sample_rate=None,
                 feature_file_ext='npy', rdOffset=False, rdDrop=False, train_val_perms=None, data_path=None):
        if dataset_name.lower() not in ['summe', 'tvsum']:
            print('Unrecognized dataset {:s}'.format(dataset_name))
            sys.exit(-1)
        self.dataset_name = dataset_name
        self.feature_file_ext = feature_file_ext
        self.split = split

        # self.feature_directory = os.path.join(user_root, 'datasets/%s/features/c3dd-red500' % (dataset_name))
        self.feature_directory = os.path.join(data_path, '%s/features/c3dd-red500' % (dataset_name))
        self.filenames = os.listdir(self.feature_directory)
        self.filenames = [f.split('.', 1)[0] for f in self.filenames]
        self.filenames.sort()
        n_files = len(self.filenames)
        # selected_perms = range(n_files)
        # if self.split == 'train':
        #     selected_perms = train_val_perms[:int(0.8 * n_files)]
        # elif self.split == 'val':
        #     selected_perms = train_val_perms[int(0.8 * n_files):]
        # else:
        #     print("Unrecognized split:{:s}".format(self.split))
        # self.filenames = [self.filenames[i] for i in selected_perms]
        self.filenames = [self.filenames[i] for i in train_val_perms]
        update_n_files = len(self.filenames)

        if sample_rate is None:
            self.sample_rate = [1, 2, 4]
        else:
            self.sample_rate = sample_rate
        self.seq_len = seq_length
        self.overlap = overlap
        self.rdOffset = rdOffset
        self.rdDrop = rdDrop

        print("Processing {:s}\t{:s} data".format(self.dataset_name, self.split))
        print("num_videos:{:d} clip len:{:d} sample_rate: ".format(len(self.filenames), self.seq_len) + ' '.join(
            str(self.sample_rate)))

        KY_dataset_path = os.path.join(data_path, 'KY_AAAI18/datasets')
        Kydataset = KyLoader.loadKyDataset(self.dataset_name.lower(), file_path=os.path.join(KY_dataset_path, 'eccv16_dataset_{:s}_google_pool5.h5'.format(dataset_name.lower())))
        conversion = KyLoader.loadConversion(self.dataset_name.lower(), file_path=os.path.join(KY_dataset_path, '{:s}_name_conversion.pkl'.format(dataset_name.lower())))
        self.raw2Ky = conversion[0]
        self.Ky2raw = conversion[1]

        self.full_features = {}
        self.instances = []
        self.maximum_outputs = 0
        print("Creating training instances")
        pbar = progressbar.ProgressBar(max_value=len(self.filenames))
        n_positive_instances = 0
        n_positive_train_samples = 0
        n_total_train_samples = 0
        n_users = 0
        n_notselected_seq = 0

        for file_dix, s_filename in enumerate(self.filenames):
            pbar.update(file_dix)
            Kykey = self.raw2Ky[s_filename]
            s_usersummaries = Kydataset[Kykey]['user_summary'][...]
            s_usersummaries = s_usersummaries.transpose()
            n_frames = s_usersummaries.shape[0]

            #Updates
            # take average annotation, get a score, use DP generate a new annotation
            s_usersummaries = np.mean(s_usersummaries, 1, keepdims=True)
            s_segments, s_segment_scores = LoaderUtils.convertscores2segs(s_usersummaries)
            s_selected_segments = rep_conversions.selecteTopSegments(s_segments, s_segment_scores, n_frames)
            s_frame01scores = rep_conversions.keyshots2frame01scores(s_selected_segments, n_frames)
            s_frame01scores = s_frame01scores.reshape([-1, 1])

            # load features
            # TODO: if use dimension reduced feature, this need to change to read numpy files
            s_features = np.load(
                os.path.join(self.feature_directory, '{:s}.{:s}'.format(s_filename, self.feature_file_ext)))
            # the size of s_features is: [length, fea_dim]
            # s_features = s_features['fc7']
            s_features_len = len(s_features)
            # the length of c3d feature is larger than annotation, choose middles to match
            assert abs(n_frames - s_features_len) < 6, 'annotation and feature length not equal! {:d}, {:d}'.format(
                n_frames, s_features_len)
            offset = abs(s_features_len - n_frames) / 2
            s_features = s_features[offset:offset + n_frames]
            self.full_features[s_filename] = s_features

            s_n_users = s_frame01scores.shape[1]
            n_users += s_n_users

            # s_segments = LoaderUtils.convertlabels2segs(s_usersummaries) # load segments, check this function...
            # TODO: starting from here, you may consider changing it according to dataloader_c3dd_aug_fast
            for s_user in range(s_n_users):
                s_segments = LoaderUtils.convertlabels2segs(
                    s_frame01scores[:, [s_user]])  # load segments, check this function...
                n_positive_instances += len(s_segments)
                for s_sample_rate in self.sample_rate:
                    s_seq_len = self.seq_len * s_sample_rate
                    # only pick sequence whose length are longer than length to be picked
                    if s_seq_len <= n_frames:
                        start_idx = 0
                        isInbound = True
                        while start_idx < n_frames and isInbound:
                            end_idx = start_idx + s_seq_len
                            # UPDATE: cannot set to >, since we want to set isInbound to False this time
                            if end_idx >= n_frames:
                                isInbound = False
                                start_idx = start_idx - (end_idx - n_frames)
                                end_idx = n_frames

                            s_instance = {}
                            s_instance['name'] = s_filename
                            s_instance['start'] = start_idx
                            s_instance['end'] = end_idx
                            s_instance['actions'] = []
                            s_instance['sample_rate'] = s_sample_rate
                            s_instance['n_frames'] = n_frames
                            # TODO: also think about here, perhaps keep the ones that are overlap with the current clip over a threshod?
                            # TODO: in this way, how are we assigning them scores?
                            s_instance_window = [start_idx, end_idx]

                            for s_action in s_segments:
                                # Update: here include the partially overlaps...
                                if compute_intersection(s_action, s_instance_window) == 1:
                                    s_action_start = max(s_action[0], s_instance_window[0])
                                    s_action_end = min(s_action[1],
                                                       s_instance_window[1] - 1)  # TODO:check if here should minus 1
                                    # TODO: add overlap rate here!
                                    s_instance['actions'].append([s_action_start, s_action_end])

                            if len(s_instance['actions']) > self.maximum_outputs:
                                self.maximum_outputs = len(s_instance['actions'])
                            self.instances.append(s_instance)
                            n_positive_train_samples += len(s_instance['actions'])
                            start_idx = int(start_idx + (1 - self.overlap) * s_seq_len)
                    else:
                        n_notselected_seq += 1

        n_total_train_samples = len(self.instances) * self.maximum_outputs
        self.n_total_train_samples = n_total_train_samples
        self.n_positive_train_samples = n_positive_train_samples
        print(
            "{:s}\t{:d} video clips, {:d} training instances, {:d} positive examples, max instance per segment:{:d}, total number users:{:d}, not selected sequences:{:d}, total:{:d}, total pos:{:d}".
            format(split, update_n_files, len(self.instances), n_positive_instances, self.maximum_outputs, n_users, n_notselected_seq, n_total_train_samples, n_positive_train_samples))
    def Evaluate(self, model, use_cuda=True):

        F1s = 0
        n_notselected_seq = 0
        widgets = [
            ' -- [ ',
            progressbar.Counter(), '|',
            str(self.dataset_size), ' ] ',
            progressbar.Bar(), ' name:  ',
            progressbar.FormatLabel(''), ' F1s: ',
            progressbar.FormatLabel(''), ' (',
            progressbar.ETA(), ' ) '
        ]

        pbar = progressbar.ProgressBar(max_value=self.dataset_size,
                                       widgets=widgets)
        pbar.start()

        #FIXME This process is problematic and needs update!
        for video_idx, (s_name, s_feature, s_groundtruth01score) in enumerate(
                zip(self.videonames, self.videofeatures,
                    self.groundtruth01scores)):
            n_frames = s_feature.shape[0]

            pred_segments = []
            pred_scores = []
            for s_sample_rate in self.sample_rate:
                sample_rate_feature = s_feature[::s_sample_rate, :]
                sample_rate_nframes = sample_rate_feature.shape[0]

                startingBounds = 0
                if sample_rate_nframes < self.seq_len:
                    n_notselected_seq += 1
                else:
                    isInbound = True
                    proposedSegments = []
                    while startingBounds < sample_rate_nframes and isInbound:
                        endingBounds = startingBounds + self.seq_len
                        if endingBounds >= sample_rate_nframes:
                            isInbound = False
                            endingBounds = sample_rate_nframes
                            startingBounds = endingBounds - self.seq_len
                        proposedSegments.append([startingBounds, endingBounds])
                        startingBounds += int(
                            (1 - self.overlap) * self.seq_len)

                    # TODO Here could also be of change: record the clips and dynamic programming based on non-overlap segments and scores...
                    for s_proposed_segment in proposedSegments:
                        startIdx = s_proposed_segment[0]
                        endIdx = s_proposed_segment[1]
                        assert endIdx - startIdx == self.seq_len, "distance between startIdx and endIdx should be seq_len:{:d},{:d},{:d}".format(
                            endIdx, startIdx, self.seq_len)
                        s_clip_feature = Variable(torch.FloatTensor(
                            sample_rate_feature[startIdx:endIdx, :]),
                                                  requires_grad=False)
                        if use_cuda:
                            s_clip_feature = s_clip_feature.cuda()

                        s_clip_feature = s_clip_feature.permute(1,
                                                                0).unsqueeze(0)

                        _, head_positions, _, tail_positions, cls_scores, _ = model(
                            s_clip_feature)
                        head_positions, tail_positions = helper.switch_positions(
                            head_positions, tail_positions)
                        # correct ones:
                        head_positions = (head_positions +
                                          startIdx) * s_sample_rate
                        tail_positions = (tail_positions +
                                          startIdx) * s_sample_rate

                        head_positions = head_positions.squeeze(0)
                        tail_positions = tail_positions.squeeze(0)
                        cls_scores = cls_scores.squeeze(0)

                        pred_positions = torch.stack(
                            [head_positions, tail_positions], dim=-1)
                        # cls_scores = F.softmax(cls_scores, dim=-1)[:, -1]
                        cls_scores = F.hardtanh(
                            cls_scores, min_val=0,
                            max_val=1).contiguous().view(-1)

                        pred_segments.append(pred_positions.data.cpu().numpy())
                        pred_scores.append(cls_scores.data.cpu().numpy())

            #FIXME: debug here!
            pred_segments = np.concatenate(pred_segments)
            pred_scores = np.concatenate(pred_scores)
            updated_segments, updated_scores, picks = NMS.non_maxima_supression(
                pred_segments, pred_scores)
            selected_segments = rep_conversions.selecteTopSegments(
                updated_segments, updated_scores, n_frames)
            pred_framescores = rep_conversions.keyshots2frame01scores(
                selected_segments, n_frames)

            s_F1, _, _ = sum_tools.evaluate_summary(
                pred_framescores, s_groundtruth01score.reshape([1, -1]),
                self.eval_metrics)

            F1s += s_F1
            widgets[-6] = progressbar.FormatLabel('{:s}'.format(s_name))
            widgets[-4] = progressbar.FormatLabel('{:.4f}'.format(s_F1))
            pbar.update(video_idx)

        if n_notselected_seq > 0:
            print("not selected sequence:{:d}".format(n_notselected_seq))

        return F1s / self.dataset_size
    def __init__(self,
                 dataset_name='TVSum',
                 split='train',
                 seq_length=90,
                 overlap=0.9,
                 sample_rate=None,
                 feature_file_ext='npy',
                 sum_budget=0.15,
                 train_val_perms=None,
                 eval_metrics='avg',
                 data_path=None):

        if dataset_name.lower() not in ['summe', 'tvsum']:
            print('Unrecognized dataset {:s}'.format(dataset_name))
        self.dataset_name = dataset_name
        self.eval_metrics = eval_metrics  #[self.dataset_name.lower()]
        self.split = split
        self.sum_budget = sum_budget
        self.feature_file_ext = feature_file_ext

        self.feature_directory = os.path.join(
            data_path, '%s/features/c3dd-red500' % (dataset_name))
        self.filenames = os.listdir(self.feature_directory)
        self.filenames = [f.split('.', 1)[0] for f in self.filenames]
        self.filenames.sort()
        n_files = len(self.filenames)
        self.filenames = [self.filenames[i] for i in train_val_perms]

        if sample_rate is None:
            self.sample_rate = [1, 2, 4]
        else:
            self.sample_rate = sample_rate
        self.seq_len = seq_length
        self.overlap = overlap

        self.videofeatures = []
        self.groundtruthscores = []
        self.groundtruth01scores = []
        # self.segments = []
        self.videonames = []
        KY_dataset_path = os.path.join(data_path, 'KY_AAAI18/datasets')
        Kydataset = KyLoader.loadKyDataset(
            self.dataset_name.lower(),
            file_path=os.path.join(
                KY_dataset_path, 'eccv16_dataset_{:s}_google_pool5.h5'.format(
                    dataset_name.lower())))
        conversion = KyLoader.loadConversion(
            self.dataset_name.lower(),
            file_path=os.path.join(
                KY_dataset_path,
                '{:s}_name_conversion.pkl'.format(dataset_name.lower())))
        self.raw2Ky = conversion[0]
        self.Ky2raw = conversion[1]

        for s_video_idx, s_filename in enumerate(self.filenames):
            KyKey = self.raw2Ky[s_filename]

            s_scores = Kydataset[KyKey]['gtscore'][...]
            s_scores = s_scores.reshape([-1, 1])

            n_frames = s_scores.shape[0]

            s_segments, s_segment_scores = LoaderUtils.convertscores2segs(
                s_scores)
            selected_segments = rep_conversions.selecteTopSegments(
                s_segments, s_segment_scores, n_frames)
            s_frame01scores = rep_conversions.keyshots2frame01scores(
                selected_segments, n_frames)
            # s_frame01scores = rep_conversions.framescore2frame01score_inteval(s_scores.reshape([-1]), s_segments, lratio=self.sum_budget)

            # the size of s_features is: [length, fea_dim]
            # s_video_features = np.load(
            #     os.path.join(self.feature_directory, '{:s}.{:s}'.format(s_filename, self.feature_file_ext)))
            s_video_features = Kydataset[KyKey]['features']
            s_features_len = len(s_video_features)
            # the length of c3d feature is larger than annotation, choose middles to match
            assert abs(
                n_frames - s_features_len
            ) < 6, 'annotation and feature length not equal! {:d}, {:d}'.format(
                n_frames, s_features_len)
            offset = abs(s_features_len - n_frames) / 2
            s_video_features = s_video_features[offset:offset + n_frames]

            self.groundtruthscores.append(s_scores)
            self.groundtruth01scores.append(s_frame01scores)
            self.videofeatures.append(s_video_features)
            # self.segments.append(s_segments)
            self.videonames.append(s_filename)
        self.dataset_size = len(self.videofeatures)
        print("{:s}\tEvaluator: {:s}\t{:d} Videos".format(
            self.dataset_name, self.split, self.dataset_size))
    def __init__(self, dataset_name='TVSum', split='train', seq_length=90, overlap=0.9, sample_rate=None,
                 feature_file_ext='npy', sum_budget=0.15, train_val_perms=None, eval_metrics='max', data_path=None):

        if dataset_name.lower() not in ['summe', 'tvsum']:
            print('Unrecognized dataset {:s}'.format(dataset_name))
        self.dataset_name = dataset_name
        self.eval_metrics = eval_metrics#[self.dataset_name.lower()]
        self.split = split
        self.sum_budget = sum_budget
        self.feature_file_ext = feature_file_ext
        
        # self.feature_directory = os.path.join(user_root, 'datasets/%s/features/c3dd-red500' % (dataset_name))
        self.feature_directory = os.path.join(data_path, '%s/features/c3dd-red500' % (dataset_name))
        self.filenames = os.listdir(self.feature_directory)
        self.filenames = [f.split('.', 1)[0] for f in self.filenames]
        self.filenames.sort()
        n_files = len(self.filenames)
        # selected_perms = range(n_files)
        # if self.split == 'train':
        #     selected_perms = train_val_perms[:int(0.8 * n_files)]
        # elif self.split == 'val':
        #     selected_perms = train_val_perms[int(0.8 * n_files):]
        # else:
        #     print("Unrecognized split:{:s}".format(self.split))
        
        # self.filenames = [self.filenames[i] for i in selected_perms]
        self.filenames = [self.filenames[i] for i in train_val_perms]

        if sample_rate is None:
            self.sample_rate = [1, 2, 4]
        else:
            self.sample_rate = sample_rate
        self.seq_len = seq_length
        self.overlap = overlap

        self.videofeatures = []
        self.groundtruthscores = []
        self.combinegroundtruth01scores = []
        # self.segments = []
        self.videonames = []
        KY_dataset_path = os.path.join(data_path, 'KY_AAAI18/datasets')
        Kydataset = KyLoader.loadKyDataset(self.dataset_name.lower(), file_path=os.path.join(KY_dataset_path, 'eccv16_dataset_{:s}_google_pool5.h5'.format(dataset_name.lower())))
        conversion = KyLoader.loadConversion(self.dataset_name.lower(), file_path=os.path.join(KY_dataset_path, '{:s}_name_conversion.pkl'.format(dataset_name.lower())))
        self.raw2Ky = conversion[0]
        self.Ky2raw = conversion[1]

        # project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
        # raw_data_path = os.path.join(project_root, 'Devs_vsSum/datasets/TVSum/TVSumRaw.pkl')
        # raw_annotation_data = pkl.load(open(raw_data_path, 'rb'))

        for s_video_idx, s_filename in enumerate(self.filenames):
            KyKey = self.raw2Ky[s_filename]

            s_scores = Kydataset[KyKey]['user_summary'][...]
            s_scores = s_scores.transpose()
            n_frames = len(s_scores)
            # s_segments = LoaderUtils.convertlabels2segs(s_scores)

            # raw_user_summaris = raw_annotation_data[s_filename]
            # raw_user_summaris_01 = []
            # for s_raw_user_summary in raw_user_summaris:
            #     assert len(s_raw_user_summary) == n_frames

            #     s_raw_user_summary = np.expand_dims(np.array(s_raw_user_summary), -1)
            #     s_summary_segments, s_summary_scores = LoaderUtils.convertscores2segs(s_raw_user_summary)
            #     s_selected_segments = rep_conversions.selecteTopSegments(s_summary_segments, s_summary_scores, n_frames)
            #     # raw_user_summaris_01.append(s_segments)
            #     s_frame01scores = rep_conversions.keyshots2frame01scores(s_selected_segments, n_frames)
            #     raw_user_summaris_01.append(s_frame01scores)
            # raw_user_summaris_01 = np.stack(raw_user_summaris_01, axis=1)


            # raw_user_summaris = np.array(raw_user_summaris)
            # raw_user_summaris = raw_user_summaris.transpose()
            ky_combine_summaris = np.mean(s_scores, 1, keepdims=True)
            s_combine_segments, s_combine_segment_scores = LoaderUtils.convertscores2segs(ky_combine_summaris)
            s_combine_selected_segments = rep_conversions.selecteTopSegments(s_combine_segments, s_combine_segment_scores, n_frames)
            s_combine_frame01scores = rep_conversions.keyshots2frame01scores(s_combine_selected_segments, n_frames)


            # the size of s_features is: [length, fea_dim]
            s_video_features = np.load(
                os.path.join(self.feature_directory, '{:s}.{:s}'.format(s_filename, self.feature_file_ext)))
            s_features_len = len(s_video_features)
            # the length of c3d feature is larger than annotation, choose middles to match
            assert abs(n_frames - s_features_len) < 6, 'annotation and feature length not equal! {:d}, {:d}'.format(
                n_frames, s_features_len)
            offset = abs(s_features_len - n_frames) / 2
            s_video_features = s_video_features[offset:offset + n_frames]

            self.groundtruthscores.append(s_scores)
            self.videofeatures.append(s_video_features)
            self.combinegroundtruth01scores.append(s_combine_frame01scores)
            # self.segments.append(s_segments)
            self.videonames.append(s_filename)
        self.dataset_size = len(self.videofeatures)
        print("{:s}\tEvaluator: {:s}\t{:d} Videos".format(self.dataset_name, self.split, self.dataset_size))