def __get_frame_names_from_csv_file(annot_text_path, min_frames_per_video, max_frames_per_video, sampling=True): root_path = c.data_root_path counts_before = [] counts_after = [] count = 0 video_frames_dict = dict() with open(annot_text_path) as f: reader = csv.DictReader(f) for row in reader: if count % 100 == 0: print('%d' % (count)) count += 1 action_strings = row['actions'] video_id = row['id'] if len(action_strings) == 0: continue frames_relative_root_path = 'Charades/frames/Charades_v1_rgb/%s' % ( video_id) frames_root_path = '%s/%s' % (root_path, frames_relative_root_path) frame_names = utils.file_names(frames_root_path, nat_sorted=True) n_frames = len(frame_names) counts_before.append(n_frames) # 如果设置了最大和最小帧数的限制,则需要进行帧的采样 if sampling: frame_names = __sample_frames(frame_names, min_frames_per_video, max_frames_per_video) n_frames = len(frame_names) counts_after.append(n_frames) # sample from these frames video_frames_dict[video_id] = frame_names # 视频id及其对应的帧路径 counts = np.array(counts_before) print('counts before') print(np.min(counts)) print(np.max(counts)) print(np.average(counts)) print(len(np.where(counts < min_frames_per_video)[0])) print(len(np.where(counts < max_frames_per_video)[0])) counts = np.array(counts_after) print('counts after') print(np.min(counts)) print(np.max(counts)) print(np.average(counts)) print(len(np.where(counts < min_frames_per_video)[0])) print(len(np.where(counts < max_frames_per_video)[0])) return video_frames_dict
def __get_frame_names_untrimmed_from_csv_file_for_i3d(annot_text_path, n_frames_per_video): count = 0 video_frames_dict = dict() #root_path = c.data_root_path root_path = '/content/' n_lines = len(open(annot_text_path).readlines()) with open(annot_text_path) as f: reader = csv.DictReader(f) for row in reader: if count % 100 == 0: print('... %d/%d' % (count, n_lines)) count += 1 action_strings = row['actions'] video_id = row['id'] # some videos don't contain action annotation if len(action_strings) == 0: continue # get all frames of the video frames_relative_root_path = 'Charades_v1_rgb/%s' % (video_id) frames_root_path = '%s/%s' % (root_path, frames_relative_root_path) video_frame_names = utils.file_names(frames_root_path, nat_sorted=True) # sample from these frames video_frame_names = __sample_frames_for_i3d(video_frame_names, n_frames_per_video) n_frames = len(video_frame_names) assert n_frames == n_frames_per_video video_frames_dict[video_id] = video_frame_names return video_frames_dict
def _800_prepare_video_frames_path_dict(): frame_relative_pathes_dict_path = Pth( 'EPIC-Kitchens/annotations/frame_relative_pathes_dict.pkl') video_names_splits_path = Pth( 'EPIC-Kitchens/annotations/video_names_splits.pkl') imgs_root_path = Pth('EPIC-Kitchens/frames_rgb_resized/train') (video_names_tr, video_names_te) = utils.pkl_load(video_names_splits_path) video_names = np.hstack((video_names_tr, video_names_te)) frame_relative_pathes_dict = {} n_videos = len(video_names) for idx, video_id in enumerate(video_names): utils.print_counter(idx, n_videos) person_id = video_id.split('_')[0] video_frames_root_path = '%s/%s/%s' % (imgs_root_path, person_id, video_id) video_frames_names = utils.file_names(video_frames_root_path, is_nat_sort=True) video_frames_names = np.array(video_frames_names) video_frames_relative_pathes = np.array([ '%s/%s/%s' % (person_id, video_id, n) for n in video_frames_names ]) frame_relative_pathes_dict[video_id] = video_frames_relative_pathes utils.pkl_dump(frame_relative_pathes_dict, frame_relative_pathes_dict_path)
def __sample_frames(video_relative_pathes, n_frames_per_video, model_type): video_frames_dict = dict() n_videos = len(video_relative_pathes) assert model_type in ['resnet', 'i3d', 'non_local'] for idx_video, video_relative_path in enumerate(video_relative_pathes): utils.print_counter(idx_video, n_videos, 100) video_id = __video_relative_path_to_video_id(video_relative_path) # get all frames of the video frames_root_path = Pth('Breakfast/frames/%s', (video_id, )) video_frame_names = utils.file_names(frames_root_path, is_nat_sort=True) # sample from these frames if model_type == 'resnet': video_frame_names = __sample_frames_for_resnet( video_frame_names, n_frames_per_video) elif model_type == 'i3d': video_frame_names = __sample_frames_for_i3d( video_frame_names, n_frames_per_video) elif model_type == 'non_local': video_frame_names = __sample_frames_for_non_local( video_frame_names, n_frames_per_video) else: raise Exception('Unkonwn model type: %s' % (model_type)) n_frames = len(video_frame_names) assert n_frames == n_frames_per_video video_frames_dict[video_id] = video_frame_names return video_frames_dict
def __get_frames_names_in_given_duration(video_id, start_time_in_sec, stop_time_in_sec): """ For a given video_id with start and stop time in seconds, get the relative pathes of the related frames. """ root_path = c.data_root_path frames_relative_root_path = 'Charades/frames/Charades_v1_rgb/%s' % (video_id) frames_root_path = '%s/%s' % (root_path, frames_relative_root_path) frame_names = utils.file_names(frames_root_path, nat_sorted=True) idx_start = __convert_seconds_to_frame_idx(start_time_in_sec) idx_stop = __convert_seconds_to_frame_idx(stop_time_in_sec) frame_names = frame_names[idx_start: idx_stop + 1] return frame_names
def __get_frame_names_untrimmed_from_csv_file_for_ordered( annot_text_path, n_frames_per_video, is_resnet=False): counts = [] count = 0 video_frames_dict = dict() root_path = c.data_root_path n_lines = len(open(annot_text_path).readlines()) with open(annot_text_path) as f: reader = csv.DictReader(f) for row in reader: if count % 100 == 0: print('... %d/%d' % (count, n_lines)) count += 1 action_strings = row['actions'] # if not action in the current video if len(action_strings) == 0: continue video_id = row['id'] frames_root_path = '%s/charades/frames/Charades_v1_rgb/%s' % ( root_path, video_id) video_frame_names = utils.file_names(frames_root_path, nat_sorted=True) if is_resnet: video_frame_names = __sample_frames_ordered_for_resnet( video_frame_names, n_frames_per_video) else: video_frame_names = __sample_frames_ordered( video_frame_names, n_frames_per_video) n_frames = len(video_frame_names) assert n_frames == n_frames_per_video counts.append(n_frames) # sample from these frames video_frames_dict[video_id] = video_frame_names counts = np.array(counts) print('counts before') print(np.min(counts)) print(np.max(counts)) print(np.average(counts)) print(len(np.where(counts < n_frames_per_video)[0])) return video_frames_dict
def __get_frame_names_untrimmed_from_csv_file_for_i3d(annot_text_path, n_frames_per_video): ''' #video_frames_dict_tr = __get_frame_names_untrimmed_from_csv_file_for_i3d(annot_tr_text_path, n_frames_per_video) :param annot_text_path: './data/Charades/annotation/Charades_v1_train.csv' :param n_frames_per_video: 256 :return: 返回采样后的视频帧词典 ''' count = 0 video_frames_dict = dict() root_path = c.data_root_path n_lines = len(open(annot_text_path).readlines()) with open(annot_text_path) as f: reader = csv.DictReader(f) for row in reader: if count % 100 == 0: print('... %d/%d' % (count, n_lines)) count += 1 action_strings = row['actions'] #视频的动作时间标注 video_id = row['id'] #视频的id名称 # some videos don't contain action annotation if len(action_strings) == 0: continue # get all frames of the video,得到该真video_id视频真实图片所存放的位置 frames_relative_root_path = '/home/r/renpengzhen/Datasets/Charades/Charades_v1_rgb/%s' % ( video_id) #得到对应视频的所有帧图片 # frames_root_path = '%s/%s' % (root_path, frames_relative_root_path) #得到视频中所有图片的名字,即视频帧的名字 video_frame_names = utils.file_names(frames_relative_root_path, is_nat_sort=True) # sample from these frames,在视频中进行采样 video_frame_names = __sample_frames_for_i3d( video_frame_names, n_frames_per_video) n_frames = len(video_frame_names) assert n_frames == n_frames_per_video #将采样后的视频帧存到词典中 video_frames_dict[video_id] = video_frame_names return video_frames_dict
def __count_time_in_each_video(is_training=True): root_path = c.data_root_path annot_tr_text_path = '%s/Charades/annotation/Charades_v1_train.csv' % ( root_path) annot_te_text_path = '%s/Charades/annotation/Charades_v1_test.csv' % ( root_path) annot_text_path = annot_tr_text_path if is_training else annot_te_text_path frames_per_instance = [] frames_per_videos = [] time_per_videos = [] count = 0 with open(annot_text_path) as f: reader = csv.DictReader(f) for row in reader: if count % 100 == 0: print('%d' % (count)) count += 1 action_strings = row['actions'] action_strings_splits = action_strings.split(';') video_id = row['id'] if len(action_strings) == 0: print('... no action for video %s' % (video_id)) continue frames_relative_root_path = 'Charades/frames/Charades_v1_rgb/%s' % ( video_id) frames_root_path = '%s/%s' % (root_path, frames_relative_root_path) frame_names = utils.file_names(frames_root_path, nat_sorted=True) frames_per_video = 0 time_per_video = 0 for action_st in action_strings_splits: action_splits = action_st.split(' ') action_start = action_splits[1] action_end = action_splits[2] action_time = float(action_end) - float(action_start) time_per_video += action_time idx_start = __convert_seconds_to_frame_idx(action_start) idx_stop = __convert_seconds_to_frame_idx(action_end) frame_names = frame_names[idx_start:idx_stop + 1] n_frames_per_instance = len(frame_names) frames_per_video += n_frames_per_instance frames_per_instance.append(n_frames_per_instance) time_per_videos.append(time_per_video) frames_per_videos.append(frames_per_video) print(frames_per_instance) print(len(frames_per_instance)) print(np.sum(frames_per_instance)) print(np.average(frames_per_instance)) print(np.min(frames_per_instance)) print(np.max(frames_per_instance)) print(frames_per_videos) print(len(frames_per_videos)) print(np.sum(frames_per_videos)) print(np.average(frames_per_videos)) print(np.min(frames_per_videos)) print(np.max(frames_per_videos)) print(time_per_videos) print(len(time_per_videos)) print(np.sum(time_per_videos)) print(np.average(time_per_videos)) print(np.min(time_per_videos)) print(np.max(time_per_videos)) print(count)
def _101_prepare_action_ids(): """ Get list of all unit-actions and activities :return: """ video_types = ['cam', 'webcam', 'stereo'] videos_root_path = Pth('Breakfast/videos') unit_actions_path = Pth('Breakfast/annotation/unit_actions_list.pkl') activities_path = Pth('Breakfast/annotation/activities_list.pkl') person_names = utils.folder_names(videos_root_path, is_nat_sort=True) unit_actions = [] activities = [] # loop on persons for person_name in person_names: p_video_root_path = '%s/%s' % (videos_root_path, person_name) p_video_types = [ n for n in utils.folder_names(p_video_root_path) if __check_correct_video_type(video_types, n) ] p_video_types = np.array(p_video_types) # loop on videos for each person for p_video_type in p_video_types: # get file names instance_video_root_path = '%s/%s' % (p_video_root_path, p_video_type) instance_video_names = utils.file_names(instance_video_root_path, is_nat_sort=True) # if stereo videos, consider only the first channel instance_video_names = [ n for n in instance_video_names if utils.get_file_extension(n) == 'avi' and ( 'stereo' not in p_video_type or 'ch0' in n) ] # append relative pathes of videos instance_video_relative_pathes = [ 'Breakfast/videos/%s/%s/%s' % (person_name, p_video_type, n) for n in instance_video_names ] # also, get ground truth for unit-actions and activities instance_annot_file_pathes = [ '%s/%s.txt' % (instance_video_root_path, utils.remove_extension(n)) for n in instance_video_names ] instance_unit_actions = __get_action_names_from_files( instance_annot_file_pathes) instance_activities = [ utils.remove_extension(n).split('_')[1] for n in instance_video_relative_pathes ] unit_actions += instance_unit_actions activities += instance_activities activities = np.unique(activities) activities = natsort.natsorted(activities) activities = np.array(activities) unit_actions = np.unique(unit_actions) unit_actions = natsort.natsorted(unit_actions) unit_actions = np.array(unit_actions) print len(activities), len(unit_actions) print activities print unit_actions utils.pkl_dump(unit_actions, unit_actions_path) utils.pkl_dump(activities, activities_path) unit_actions_path = Pth('Breakfast/annotation/unit_actions_list.txt') activities_path = Pth('Breakfast/annotation/activities_list.txt') utils.txt_dump(unit_actions, unit_actions_path) utils.txt_dump(activities, activities_path)
def _102_prepare_video_annot(): """ Check ground truth of each video. :return: """ video_types = ['cam', 'webcam', 'stereo'] videos_root_path = Pth('Breakfast/videos') unit_actions_path = Pth('Breakfast/annotation/unit_actions_list.pkl') activities_path = Pth('Breakfast/annotation/activities_list.pkl') annot_unit_actions_path = Pth( 'Breakfast/annotation/annot_unit_actions.pkl') annot_activities_path = Pth('Breakfast/annotation/annot_activities.pkl') unit_actions = utils.pkl_load(unit_actions_path) activities = utils.pkl_load(activities_path) person_names = utils.folder_names(videos_root_path, is_nat_sort=True) split_ratio = 0.85 video_relative_pathes_tr = [] video_relative_pathes_te = [] y_unit_actions_tr = [] y_unit_actions_te = [] y_activities_tr = [] y_activities_te = [] n_persons = len(person_names) n_persons_tr = int(n_persons * split_ratio) n_persons_te = n_persons - n_persons_tr person_names_tr = person_names[:n_persons_tr] person_names_te = person_names[n_persons_tr:] # loop on persons for person_name in person_names: p_video_root_path = '%s/%s' % (videos_root_path, person_name) p_video_types = [ n for n in utils.folder_names(p_video_root_path) if __check_correct_video_type(video_types, n) ] p_video_types = np.array(p_video_types) # loop on videos for each person for p_video_type in p_video_types: # get file names instance_video_root_path = '%s/%s' % (p_video_root_path, p_video_type) instance_video_names = utils.file_names(instance_video_root_path, is_nat_sort=True) # if stereo videos, consider only the first channel instance_video_names = [ n for n in instance_video_names if utils.get_file_extension(n) == 'avi' and ( 'stereo' not in p_video_type or 'ch0' in n) ] # append relative pathes of videos instance_video_relative_pathes = [ '%s/%s/%s' % (person_name, p_video_type, n) for n in instance_video_names ] # also, get ground truth for unit-actions and activities instance_activities_y, instance_unit_actions_y = __get_gt_activities_and_actions( instance_video_root_path, instance_video_names, activities, unit_actions) if person_name in person_names_tr: video_relative_pathes_tr += instance_video_relative_pathes y_unit_actions_tr += instance_unit_actions_y y_activities_tr += instance_activities_y else: video_relative_pathes_te += instance_video_relative_pathes y_unit_actions_te += instance_unit_actions_y y_activities_te += instance_activities_y video_relative_pathes_tr = np.array(video_relative_pathes_tr) video_relative_pathes_te = np.array(video_relative_pathes_te) y_activities_tr = np.array(y_activities_tr) y_activities_te = np.array(y_activities_te) y_unit_actions_tr = np.array(y_unit_actions_tr) y_unit_actions_te = np.array(y_unit_actions_te) print video_relative_pathes_tr.shape print video_relative_pathes_te.shape print y_activities_tr.shape print y_activities_te.shape print y_unit_actions_tr.shape print y_unit_actions_te.shape # finally, save video annotation () annot_unit_action = (video_relative_pathes_tr, y_unit_actions_tr, video_relative_pathes_te, y_unit_actions_te) annot_activities = (video_relative_pathes_tr, y_activities_tr, video_relative_pathes_te, y_activities_te) utils.pkl_dump(annot_unit_action, annot_unit_actions_path) utils.pkl_dump(annot_activities, annot_activities_path) return