def __init__(self, dataset_name='TVSum', split='train', seq_length=90, overlap=0.9, sample_rate=None, feature_file_ext='npy', rdOffset=False, rdDrop=False, train_val_perms=None, data_path=None): if dataset_name.lower() not in ['summe', 'tvsum']: print('Unrecognized dataset {:s}'.format(dataset_name)) sys.exit(-1) self.dataset_name = dataset_name self.feature_file_ext = feature_file_ext self.split = split # self.feature_directory = os.path.join(user_root, 'datasets/%s/features/c3dd-red500' % (dataset_name)) self.feature_directory = os.path.join(data_path, '%s/features/c3dd-red500' % (dataset_name)) self.filenames = os.listdir(self.feature_directory) self.filenames = [f.split('.', 1)[0] for f in self.filenames] self.filenames.sort() n_files = len(self.filenames) # selected_perms = range(n_files) # if self.split == 'train': # selected_perms = train_val_perms[:int(0.8 * n_files)] # elif self.split == 'val': # selected_perms = train_val_perms[int(0.8 * n_files):] # else: # print("Unrecognized split:{:s}".format(self.split)) # self.filenames = [self.filenames[i] for i in selected_perms] self.filenames = [self.filenames[i] for i in train_val_perms] update_n_files = len(self.filenames) if sample_rate is None: self.sample_rate = [1, 2, 4] else: self.sample_rate = sample_rate self.seq_len = seq_length self.overlap = overlap self.rdOffset = rdOffset self.rdDrop = rdDrop print("Processing {:s}\t{:s} data".format(self.dataset_name, self.split)) print("num_videos:{:d} clip len:{:d} sample_rate: ".format(len(self.filenames), self.seq_len) + ' '.join( str(self.sample_rate))) KY_dataset_path = os.path.join(data_path, 'KY_AAAI18/datasets') Kydataset = KyLoader.loadKyDataset(self.dataset_name.lower(), file_path=os.path.join(KY_dataset_path, 'eccv16_dataset_{:s}_google_pool5.h5'.format(dataset_name.lower()))) conversion = KyLoader.loadConversion(self.dataset_name.lower(), file_path=os.path.join(KY_dataset_path, '{:s}_name_conversion.pkl'.format(dataset_name.lower()))) self.raw2Ky = conversion[0] self.Ky2raw = conversion[1] self.full_features = {} self.instances = [] self.maximum_outputs = 0 print("Creating training instances") pbar = progressbar.ProgressBar(max_value=len(self.filenames)) n_positive_instances = 0 n_positive_train_samples = 0 n_total_train_samples = 0 n_users = 0 n_notselected_seq = 0 for file_dix, s_filename in enumerate(self.filenames): pbar.update(file_dix) Kykey = self.raw2Ky[s_filename] s_usersummaries = Kydataset[Kykey]['user_summary'][...] s_usersummaries = s_usersummaries.transpose() n_frames = s_usersummaries.shape[0] #Updates # take average annotation, get a score, use DP generate a new annotation s_usersummaries = np.mean(s_usersummaries, 1, keepdims=True) s_segments, s_segment_scores = LoaderUtils.convertscores2segs(s_usersummaries) s_selected_segments = rep_conversions.selecteTopSegments(s_segments, s_segment_scores, n_frames) s_frame01scores = rep_conversions.keyshots2frame01scores(s_selected_segments, n_frames) s_frame01scores = s_frame01scores.reshape([-1, 1]) # load features # TODO: if use dimension reduced feature, this need to change to read numpy files s_features = np.load( os.path.join(self.feature_directory, '{:s}.{:s}'.format(s_filename, self.feature_file_ext))) # the size of s_features is: [length, fea_dim] # s_features = s_features['fc7'] s_features_len = len(s_features) # the length of c3d feature is larger than annotation, choose middles to match assert abs(n_frames - s_features_len) < 6, 'annotation and feature length not equal! {:d}, {:d}'.format( n_frames, s_features_len) offset = abs(s_features_len - n_frames) / 2 s_features = s_features[offset:offset + n_frames] self.full_features[s_filename] = s_features s_n_users = s_frame01scores.shape[1] n_users += s_n_users # s_segments = LoaderUtils.convertlabels2segs(s_usersummaries) # load segments, check this function... # TODO: starting from here, you may consider changing it according to dataloader_c3dd_aug_fast for s_user in range(s_n_users): s_segments = LoaderUtils.convertlabels2segs( s_frame01scores[:, [s_user]]) # load segments, check this function... n_positive_instances += len(s_segments) for s_sample_rate in self.sample_rate: s_seq_len = self.seq_len * s_sample_rate # only pick sequence whose length are longer than length to be picked if s_seq_len <= n_frames: start_idx = 0 isInbound = True while start_idx < n_frames and isInbound: end_idx = start_idx + s_seq_len # UPDATE: cannot set to >, since we want to set isInbound to False this time if end_idx >= n_frames: isInbound = False start_idx = start_idx - (end_idx - n_frames) end_idx = n_frames s_instance = {} s_instance['name'] = s_filename s_instance['start'] = start_idx s_instance['end'] = end_idx s_instance['actions'] = [] s_instance['sample_rate'] = s_sample_rate s_instance['n_frames'] = n_frames # TODO: also think about here, perhaps keep the ones that are overlap with the current clip over a threshod? # TODO: in this way, how are we assigning them scores? s_instance_window = [start_idx, end_idx] for s_action in s_segments: # Update: here include the partially overlaps... if compute_intersection(s_action, s_instance_window) == 1: s_action_start = max(s_action[0], s_instance_window[0]) s_action_end = min(s_action[1], s_instance_window[1] - 1) # TODO:check if here should minus 1 # TODO: add overlap rate here! s_instance['actions'].append([s_action_start, s_action_end]) if len(s_instance['actions']) > self.maximum_outputs: self.maximum_outputs = len(s_instance['actions']) self.instances.append(s_instance) n_positive_train_samples += len(s_instance['actions']) start_idx = int(start_idx + (1 - self.overlap) * s_seq_len) else: n_notselected_seq += 1 n_total_train_samples = len(self.instances) * self.maximum_outputs self.n_total_train_samples = n_total_train_samples self.n_positive_train_samples = n_positive_train_samples print( "{:s}\t{:d} video clips, {:d} training instances, {:d} positive examples, max instance per segment:{:d}, total number users:{:d}, not selected sequences:{:d}, total:{:d}, total pos:{:d}". format(split, update_n_files, len(self.instances), n_positive_instances, self.maximum_outputs, n_users, n_notselected_seq, n_total_train_samples, n_positive_train_samples))
def Evaluate(self, model, use_cuda=True): F1s = 0 n_notselected_seq = 0 widgets = [ ' -- [ ', progressbar.Counter(), '|', str(self.dataset_size), ' ] ', progressbar.Bar(), ' name: ', progressbar.FormatLabel(''), ' F1s: ', progressbar.FormatLabel(''), ' (', progressbar.ETA(), ' ) ' ] pbar = progressbar.ProgressBar(max_value=self.dataset_size, widgets=widgets) pbar.start() #FIXME This process is problematic and needs update! for video_idx, (s_name, s_feature, s_groundtruth01score) in enumerate( zip(self.videonames, self.videofeatures, self.groundtruth01scores)): n_frames = s_feature.shape[0] pred_segments = [] pred_scores = [] for s_sample_rate in self.sample_rate: sample_rate_feature = s_feature[::s_sample_rate, :] sample_rate_nframes = sample_rate_feature.shape[0] startingBounds = 0 if sample_rate_nframes < self.seq_len: n_notselected_seq += 1 else: isInbound = True proposedSegments = [] while startingBounds < sample_rate_nframes and isInbound: endingBounds = startingBounds + self.seq_len if endingBounds >= sample_rate_nframes: isInbound = False endingBounds = sample_rate_nframes startingBounds = endingBounds - self.seq_len proposedSegments.append([startingBounds, endingBounds]) startingBounds += int( (1 - self.overlap) * self.seq_len) # TODO Here could also be of change: record the clips and dynamic programming based on non-overlap segments and scores... for s_proposed_segment in proposedSegments: startIdx = s_proposed_segment[0] endIdx = s_proposed_segment[1] assert endIdx - startIdx == self.seq_len, "distance between startIdx and endIdx should be seq_len:{:d},{:d},{:d}".format( endIdx, startIdx, self.seq_len) s_clip_feature = Variable(torch.FloatTensor( sample_rate_feature[startIdx:endIdx, :]), requires_grad=False) if use_cuda: s_clip_feature = s_clip_feature.cuda() s_clip_feature = s_clip_feature.permute(1, 0).unsqueeze(0) _, head_positions, _, tail_positions, cls_scores, _ = model( s_clip_feature) head_positions, tail_positions = helper.switch_positions( head_positions, tail_positions) # correct ones: head_positions = (head_positions + startIdx) * s_sample_rate tail_positions = (tail_positions + startIdx) * s_sample_rate head_positions = head_positions.squeeze(0) tail_positions = tail_positions.squeeze(0) cls_scores = cls_scores.squeeze(0) pred_positions = torch.stack( [head_positions, tail_positions], dim=-1) # cls_scores = F.softmax(cls_scores, dim=-1)[:, -1] cls_scores = F.hardtanh( cls_scores, min_val=0, max_val=1).contiguous().view(-1) pred_segments.append(pred_positions.data.cpu().numpy()) pred_scores.append(cls_scores.data.cpu().numpy()) #FIXME: debug here! pred_segments = np.concatenate(pred_segments) pred_scores = np.concatenate(pred_scores) updated_segments, updated_scores, picks = NMS.non_maxima_supression( pred_segments, pred_scores) selected_segments = rep_conversions.selecteTopSegments( updated_segments, updated_scores, n_frames) pred_framescores = rep_conversions.keyshots2frame01scores( selected_segments, n_frames) s_F1, _, _ = sum_tools.evaluate_summary( pred_framescores, s_groundtruth01score.reshape([1, -1]), self.eval_metrics) F1s += s_F1 widgets[-6] = progressbar.FormatLabel('{:s}'.format(s_name)) widgets[-4] = progressbar.FormatLabel('{:.4f}'.format(s_F1)) pbar.update(video_idx) if n_notselected_seq > 0: print("not selected sequence:{:d}".format(n_notselected_seq)) return F1s / self.dataset_size
def __init__(self, dataset_name='TVSum', split='train', seq_length=90, overlap=0.9, sample_rate=None, feature_file_ext='npy', sum_budget=0.15, train_val_perms=None, eval_metrics='avg', data_path=None): if dataset_name.lower() not in ['summe', 'tvsum']: print('Unrecognized dataset {:s}'.format(dataset_name)) self.dataset_name = dataset_name self.eval_metrics = eval_metrics #[self.dataset_name.lower()] self.split = split self.sum_budget = sum_budget self.feature_file_ext = feature_file_ext self.feature_directory = os.path.join( data_path, '%s/features/c3dd-red500' % (dataset_name)) self.filenames = os.listdir(self.feature_directory) self.filenames = [f.split('.', 1)[0] for f in self.filenames] self.filenames.sort() n_files = len(self.filenames) self.filenames = [self.filenames[i] for i in train_val_perms] if sample_rate is None: self.sample_rate = [1, 2, 4] else: self.sample_rate = sample_rate self.seq_len = seq_length self.overlap = overlap self.videofeatures = [] self.groundtruthscores = [] self.groundtruth01scores = [] # self.segments = [] self.videonames = [] KY_dataset_path = os.path.join(data_path, 'KY_AAAI18/datasets') Kydataset = KyLoader.loadKyDataset( self.dataset_name.lower(), file_path=os.path.join( KY_dataset_path, 'eccv16_dataset_{:s}_google_pool5.h5'.format( dataset_name.lower()))) conversion = KyLoader.loadConversion( self.dataset_name.lower(), file_path=os.path.join( KY_dataset_path, '{:s}_name_conversion.pkl'.format(dataset_name.lower()))) self.raw2Ky = conversion[0] self.Ky2raw = conversion[1] for s_video_idx, s_filename in enumerate(self.filenames): KyKey = self.raw2Ky[s_filename] s_scores = Kydataset[KyKey]['gtscore'][...] s_scores = s_scores.reshape([-1, 1]) n_frames = s_scores.shape[0] s_segments, s_segment_scores = LoaderUtils.convertscores2segs( s_scores) selected_segments = rep_conversions.selecteTopSegments( s_segments, s_segment_scores, n_frames) s_frame01scores = rep_conversions.keyshots2frame01scores( selected_segments, n_frames) # s_frame01scores = rep_conversions.framescore2frame01score_inteval(s_scores.reshape([-1]), s_segments, lratio=self.sum_budget) # the size of s_features is: [length, fea_dim] # s_video_features = np.load( # os.path.join(self.feature_directory, '{:s}.{:s}'.format(s_filename, self.feature_file_ext))) s_video_features = Kydataset[KyKey]['features'] s_features_len = len(s_video_features) # the length of c3d feature is larger than annotation, choose middles to match assert abs( n_frames - s_features_len ) < 6, 'annotation and feature length not equal! {:d}, {:d}'.format( n_frames, s_features_len) offset = abs(s_features_len - n_frames) / 2 s_video_features = s_video_features[offset:offset + n_frames] self.groundtruthscores.append(s_scores) self.groundtruth01scores.append(s_frame01scores) self.videofeatures.append(s_video_features) # self.segments.append(s_segments) self.videonames.append(s_filename) self.dataset_size = len(self.videofeatures) print("{:s}\tEvaluator: {:s}\t{:d} Videos".format( self.dataset_name, self.split, self.dataset_size))
def __init__(self, dataset_name='TVSum', split='train', seq_length=90, overlap=0.9, sample_rate=None, feature_file_ext='npy', sum_budget=0.15, train_val_perms=None, eval_metrics='max', data_path=None): if dataset_name.lower() not in ['summe', 'tvsum']: print('Unrecognized dataset {:s}'.format(dataset_name)) self.dataset_name = dataset_name self.eval_metrics = eval_metrics#[self.dataset_name.lower()] self.split = split self.sum_budget = sum_budget self.feature_file_ext = feature_file_ext # self.feature_directory = os.path.join(user_root, 'datasets/%s/features/c3dd-red500' % (dataset_name)) self.feature_directory = os.path.join(data_path, '%s/features/c3dd-red500' % (dataset_name)) self.filenames = os.listdir(self.feature_directory) self.filenames = [f.split('.', 1)[0] for f in self.filenames] self.filenames.sort() n_files = len(self.filenames) # selected_perms = range(n_files) # if self.split == 'train': # selected_perms = train_val_perms[:int(0.8 * n_files)] # elif self.split == 'val': # selected_perms = train_val_perms[int(0.8 * n_files):] # else: # print("Unrecognized split:{:s}".format(self.split)) # self.filenames = [self.filenames[i] for i in selected_perms] self.filenames = [self.filenames[i] for i in train_val_perms] if sample_rate is None: self.sample_rate = [1, 2, 4] else: self.sample_rate = sample_rate self.seq_len = seq_length self.overlap = overlap self.videofeatures = [] self.groundtruthscores = [] self.combinegroundtruth01scores = [] # self.segments = [] self.videonames = [] KY_dataset_path = os.path.join(data_path, 'KY_AAAI18/datasets') Kydataset = KyLoader.loadKyDataset(self.dataset_name.lower(), file_path=os.path.join(KY_dataset_path, 'eccv16_dataset_{:s}_google_pool5.h5'.format(dataset_name.lower()))) conversion = KyLoader.loadConversion(self.dataset_name.lower(), file_path=os.path.join(KY_dataset_path, '{:s}_name_conversion.pkl'.format(dataset_name.lower()))) self.raw2Ky = conversion[0] self.Ky2raw = conversion[1] # project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # raw_data_path = os.path.join(project_root, 'Devs_vsSum/datasets/TVSum/TVSumRaw.pkl') # raw_annotation_data = pkl.load(open(raw_data_path, 'rb')) for s_video_idx, s_filename in enumerate(self.filenames): KyKey = self.raw2Ky[s_filename] s_scores = Kydataset[KyKey]['user_summary'][...] s_scores = s_scores.transpose() n_frames = len(s_scores) # s_segments = LoaderUtils.convertlabels2segs(s_scores) # raw_user_summaris = raw_annotation_data[s_filename] # raw_user_summaris_01 = [] # for s_raw_user_summary in raw_user_summaris: # assert len(s_raw_user_summary) == n_frames # s_raw_user_summary = np.expand_dims(np.array(s_raw_user_summary), -1) # s_summary_segments, s_summary_scores = LoaderUtils.convertscores2segs(s_raw_user_summary) # s_selected_segments = rep_conversions.selecteTopSegments(s_summary_segments, s_summary_scores, n_frames) # # raw_user_summaris_01.append(s_segments) # s_frame01scores = rep_conversions.keyshots2frame01scores(s_selected_segments, n_frames) # raw_user_summaris_01.append(s_frame01scores) # raw_user_summaris_01 = np.stack(raw_user_summaris_01, axis=1) # raw_user_summaris = np.array(raw_user_summaris) # raw_user_summaris = raw_user_summaris.transpose() ky_combine_summaris = np.mean(s_scores, 1, keepdims=True) s_combine_segments, s_combine_segment_scores = LoaderUtils.convertscores2segs(ky_combine_summaris) s_combine_selected_segments = rep_conversions.selecteTopSegments(s_combine_segments, s_combine_segment_scores, n_frames) s_combine_frame01scores = rep_conversions.keyshots2frame01scores(s_combine_selected_segments, n_frames) # the size of s_features is: [length, fea_dim] s_video_features = np.load( os.path.join(self.feature_directory, '{:s}.{:s}'.format(s_filename, self.feature_file_ext))) s_features_len = len(s_video_features) # the length of c3d feature is larger than annotation, choose middles to match assert abs(n_frames - s_features_len) < 6, 'annotation and feature length not equal! {:d}, {:d}'.format( n_frames, s_features_len) offset = abs(s_features_len - n_frames) / 2 s_video_features = s_video_features[offset:offset + n_frames] self.groundtruthscores.append(s_scores) self.videofeatures.append(s_video_features) self.combinegroundtruth01scores.append(s_combine_frame01scores) # self.segments.append(s_segments) self.videonames.append(s_filename) self.dataset_size = len(self.videofeatures) print("{:s}\tEvaluator: {:s}\t{:d} Videos".format(self.dataset_name, self.split, self.dataset_size))