def __init__(self, root, json_file_input, json_file_labels, clip_size, nclips, step_size, is_val, transform_pre=None, transform_post=None, augmentation_mappings_json=None, augmentation_types_todo=None, get_item_id=False, is_test=False): self.dataset_object = WebmDataset(json_file_input, json_file_labels, root, is_test=is_test) self.json_data = self.dataset_object.json_data self.classes = self.dataset_object.classes self.classes_dict = self.dataset_object.classes_dict self.root = root self.transform_pre = transform_pre self.transform_post = transform_post self.augmentor = Augmentor(augmentation_mappings_json, augmentation_types_todo) self.clip_size = clip_size self.nclips = nclips self.step_size = step_size self.is_val = is_val self.get_item_id = get_item_id
def __init__(self, root, json_file_input, json_file_labels, clip_size, nclips, step_size, is_val, transform_pre=None, transform_post=None, augmentation_mappings_json=None, augmentation_types_todo=None, get_item_id=False, is_test=False): self.uiuc = UiucVideoV1('/data-ssd1', not is_val) self.classes = list(range(len(list(self.uiuc.action_label_dist)))) self.transform_pre = transform_pre self.transform_post = transform_post self.augmentor = Augmentor(augmentation_mappings_json, augmentation_types_todo) self.clip_size = clip_size self.nclips = nclips self.step_size = step_size self.is_val = is_val self.get_item_id = get_item_id
def __init__(self, args, root, json_file_input, json_file_labels, clip_size, nclips, step_size, is_val, num_tasks=174, transform_pre=None, transform_post=None, augmentation_mappings_json=None, augmentation_types_todo=None, is_test=False, robot_demo_transform=None): self.num_tasks = num_tasks self.is_val = is_val self.dataset_object = WebmDataset(args, json_file_input, json_file_labels, root, num_tasks=self.num_tasks, is_test=is_test, is_val=is_val) self.json_data = self.dataset_object.json_data self.classes = self.dataset_object.classes self.classes_dict = self.dataset_object.classes_dict self.root = root self.transform_pre = transform_pre self.transform_post = transform_post self.im_size = args.im_size self.batch_size = args.batch_size self.augmentor = Augmentor(augmentation_mappings_json, augmentation_types_todo) self.traj_length = clip_size self.nclips = nclips self.step_size = step_size self.similarity = args.similarity self.add_demos = args.add_demos if self.add_demos: self.robot_demo_transform = robot_demo_transform self.demo_batch_val = args.demo_batch_val classes = [] for key in self.classes_dict.keys(): if not isinstance(key, int): classes.append(key) self.classes = classes num_occur = defaultdict(int) for c in self.classes: for video in self.json_data: if video.label == c: num_occur[c] += 1 if not self.is_val: with open(args.log_dir + '/human_data_tasks.txt', 'w') as f: json.dump(num_occur, f, indent=2) else: with open(args.log_dir + '/val_human_data_tasks.txt', 'w') as f: json.dump(num_occur, f, indent=2) # Every sample in batch: anchor (randomly selected class A), positive (randomly selected class A), # and negative (randomly selected class not A) # Make dictionary for similarity triplets self.json_dict = defaultdict(list) for data in self.json_data: self.json_dict[data.label].append(data) # Make separate robot dictionary: self.robot_json_dict = defaultdict(list) self.total_robot = [] # all robot demos for data in self.json_data: if data.id == 300000: # robot video self.robot_json_dict[data.label].append(data) self.total_robot.append(data) print("Number of human videos: ", len(self.json_data), len(self.classes), "Total:", self.__len__()) # Tasks used self.tasks = args.human_tasks if self.add_demos: self.robot_tasks = args.robot_tasks assert (sum(num_occur.values()) == len(self.json_data))
class VideoFolder(torch.utils.data.Dataset): def __init__(self, root, json_file_input, json_file_labels, clip_size, nclips, step_size, is_val, transform_pre=None, transform_post=None, augmentation_mappings_json=None, augmentation_types_todo=None, get_item_id=False, is_test=False): self.dataset_object = WebmDataset(json_file_input, json_file_labels, root, is_test=is_test) self.json_data = self.dataset_object.json_data self.classes = self.dataset_object.classes self.classes_dict = self.dataset_object.classes_dict self.root = root self.transform_pre = transform_pre self.transform_post = transform_post self.augmentor = Augmentor(augmentation_mappings_json, augmentation_types_todo) self.clip_size = clip_size self.nclips = nclips self.step_size = step_size self.is_val = is_val self.get_item_id = get_item_id def __getitem__(self, index): item = self.json_data[index] framerate_sampled = self.augmentor.jitter_fps(FRAMERATE) optional_args = {"-r": "%d" % framerate_sampled} duration = self.get_duration(item.path) if duration is not None: nframes = int(duration * framerate_sampled) optional_args["-vframes"] = "%d" % nframes # Open video file reader = FFmpegReader(item.path, inputdict={}, outputdict=optional_args) try: imgs = [] for img in reader.nextFrame(): imgs.append(img) except (RuntimeError, ZeroDivisionError) as exception: print('{}: WEBM reader cannot open {}. Empty ' 'list returned.'.format(type(exception).__name__, item.path)) imgs = self.transform_pre(imgs) imgs, label = self.augmentor(imgs, item.label) imgs = self.transform_post(imgs) num_frames = len(imgs) target_idx = self.classes_dict[label] if self.nclips > -1: num_frames_necessary = self.clip_size * self.nclips * self.step_size else: num_frames_necessary = num_frames offset = 0 if num_frames_necessary < num_frames: # If there are more frames, then sample starting offset. diff = (num_frames - num_frames_necessary) # temporal augmentation if not self.is_val: offset = np.random.randint(0, diff) imgs = imgs[offset:num_frames_necessary + offset:self.step_size] if len(imgs) < (self.clip_size * self.nclips): imgs.extend([imgs[-1]] * ((self.clip_size * self.nclips) - len(imgs))) # format data to torch data = torch.stack(imgs) data = data.permute(1, 0, 2, 3) if self.get_item_id: return (data, target_idx, item.id) else: return (data, target_idx) def __len__(self): return len(self.json_data) def get_duration(self, file): cmd_output = ffprobe(file) start_time, duration = re.findall("\d+\.\d+", str(cmd_output.stdout)) return float(duration) - float(start_time)