def __getitem__(self, index): metainfo = GView(self.get_metainfo(index)) feed_dict = GView() # scene annotations if self.incl_scene: feed_dict.scene = metainfo.scene feed_dict.update(gdef.annotate_objects(metainfo.scene)) feed_dict.objects_raw = feed_dict.objects.copy() feed_dict.update(gdef.annotate_scene(metainfo.scene)) # image feed_dict.image_index = metainfo.image_index feed_dict.image_filename = metainfo.image_filename # video_folder feed_dict.video_folder = metainfo.video_folder feed_dict.video = [] if self.image_root is not None: feed_dict.image = Image.open( osp.join(self.image_root, feed_dict.image_filename)).convert("RGB") feed_dict.image, feed_dict.objects = self.image_transform( feed_dict.image, feed_dict.objects) if self.image_root is not None: import glob print("Got video 3") for name in glob.glob( osp.join(self.image_root, feed_dict.video_folder) + "/*.png"): feed_dict.video += [Image.open(name).convert("RGB")] return feed_dict.raw()
def __getitem__(self, index): metainfo = GView(self.get_metainfo(index)) feed_dict = GView() # scene annotations if self.incl_scene: feed_dict.scene = metainfo.scene feed_dict.update(gdef.annotate_objects(metainfo.scene)) feed_dict.objects_raw = feed_dict.objects.copy() feed_dict.update(gdef.annotate_scene(metainfo.scene)) # image feed_dict.image_index = metainfo.image_index feed_dict.image_filename = metainfo.image_filename if self.image_root is not None: feed_dict.image = Image.open(osp.join(self.image_root, feed_dict.image_filename)).convert('RGB') feed_dict.image, feed_dict.objects = self.image_transform(feed_dict.image, feed_dict.objects) # program feed_dict.program_raw = metainfo.program_raw feed_dict.program_seq = metainfo.program_seq feed_dict.program_tree = metainfo.program_tree feed_dict.program_qsseq = metainfo.program_qsseq feed_dict.program_qstree = metainfo.program_qstree feed_dict.question_type = metainfo.question_type # question feed_dict.answer = True return feed_dict.raw()
def __getitem__(self, index): metainfo = GView(self.get_metainfo(index)) feed_dict = GView() # scene annotations if self.incl_scene: feed_dict.scene = metainfo.scene feed_dict.update(gdef.annotate_objects(metainfo.scene)) feed_dict.objects_raw = feed_dict.objects.copy() feed_dict.update(gdef.annotate_scene(metainfo.scene)) # image feed_dict.image_index = metainfo.image_index feed_dict.image_filename = metainfo.image_filename # video feed_dict.video_folder = metainfo.video_folder video = [] original_objects = feed_dict.objects if self.image_root is not None: feed_dict.image = Image.open( osp.join(self.image_root, feed_dict.image_filename)).convert("RGB") feed_dict.image, feed_dict.objects = self.image_transform( feed_dict.image, feed_dict.objects) if self.image_root is not None and feed_dict.video_folder is not None: import glob for name in glob.glob( osp.join(self.image_root, feed_dict.video_folder) + "/*.png"): image = Image.open(name).convert("RGB") image, _ = self.image_transform(image, original_objects) video += [image] feed_dict.video = torch.cat(video) # program feed_dict.program_raw = metainfo.program_raw feed_dict.program_seq = metainfo.program_seq feed_dict.program_tree = metainfo.program_tree feed_dict.program_qsseq = metainfo.program_qsseq feed_dict.program_qstree = metainfo.program_qstree feed_dict.question_type = metainfo.question_type # question feed_dict.answer = True return feed_dict.raw()
def __getitem__(self, index): metainfo = GView(self.get_metainfo(index)) feed_dict = GView() # scene annotations if self.incl_scene: feed_dict.scene = metainfo.scene feed_dict.update(gdef.annotate_objects(metainfo.scene)) feed_dict.objects_raw = feed_dict.objects.copy() feed_dict.update(gdef.annotate_scene(metainfo.scene)) # image feed_dict.image_index = metainfo.image_index feed_dict.image_filename = metainfo.image_filename if self.image_root is not None: feed_dict.image = Image.open(osp.join(self.image_root, feed_dict.image_filename)).convert('RGB') feed_dict.image, feed_dict.objects = self.image_transform(feed_dict.image, feed_dict.objects) return feed_dict.raw()
def __getitem__(self, index): # index = index % 200 metainfo = GView(self.get_metainfo(index)) metainfo.view_id = 1 feed_dict = GView() feed_dict.scene = metainfo.scene feed_dict.attribute_name = "shape" feed_dict.concept_name = metainfo.scene["objects"][0][feed_dict.attribute_name] if self.incl_scene: feed_dict.scene = metainfo.scene feed_dict.update(gdef.annotate_objects(metainfo.scene)) if "objects" in feed_dict: # NB(Jiayuan Mao): in some datasets, object information might be completely unavailable. feed_dict.objects_raw = feed_dict.objects.copy() feed_dict.update(gdef.annotate_scene(metainfo.scene)) # image feed_dict.image_index = metainfo.image_index feed_dict.image_filename = metainfo.image_filename if self.image_root is not None and feed_dict.image_filename is not None: feed_dict.image = Image.open( osp.join(self.image_root, feed_dict.image_filename) ).convert("RGB") feed_dict.image, feed_dict.objects = self.image_transform( feed_dict.image, feed_dict.objects ) if self.depth_root is not None and feed_dict.image_filename is not None: depth_filename = feed_dict.image_filename.split(".")[0] + ".exr" feed_dict.depth = torch.tensor( load_depth(osp.join(self.depth_root, depth_filename)) ) # program # Scene # feed_dict.bboxes = torch.tensor(feed_dict.scene["obj_bboxes"][0]).reshape(-1, 9) # # feed_dict.bboxes_len = torch.tensor(feed_dict.bboxes.size(0)) # feed_dict.pix_T_cam = torch.tensor(metainfo.scene["pix_T_cams"]).float() # feed_dict.origin_T_cam = torch.tensor( # metainfo.scene["origin_T_cams"][metainfo.view_id] # ).float() return feed_dict.raw()
def __getitem__(self, index): metainfo = GView(self.get_metainfo(index)) feed_dict = GView() # metainfo annotations if self.incl_scene: feed_dict.scene = metainfo.scene feed_dict.update(gdef.annotate_objects(metainfo.scene)) if "objects" in feed_dict: # NB(Jiayuan Mao): in some datasets, object information might be completely unavailable. feed_dict.objects_raw = feed_dict.objects.copy() feed_dict.update(gdef.annotate_scene(metainfo.scene)) # image feed_dict.image_index = metainfo.image_index feed_dict.image_filename = metainfo.image_filename if self.image_root is not None and feed_dict.image_filename is not None: feed_dict.image = Image.open( osp.join(self.image_root, feed_dict.image_filename) ).convert("RGB") feed_dict.image, feed_dict.objects = self.image_transform( feed_dict.image, feed_dict.objects ) if self.depth_root is not None and feed_dict.image_filename is not None: depth_filename = feed_dict.image_filename.split(".")[0] + ".exr" feed_dict.depth = torch.tensor( load_depth(osp.join(self.depth_root, depth_filename)) ) # program if "program_raw" in metainfo: feed_dict.program_raw = metainfo.program_raw feed_dict.program_seq = metainfo.program_seq feed_dict.program_tree = metainfo.program_tree feed_dict.program_qsseq = metainfo.program_qsseq feed_dict.program_qstree = metainfo.program_qstree feed_dict.question_type = metainfo.question_type # question feed_dict.question_index = metainfo.question_index feed_dict.question_raw = metainfo.question feed_dict.question_raw_tokenized = metainfo.question_tokenized feed_dict.question_metainfo = gdef.annotate_question_metainfo(metainfo) feed_dict.question = metainfo.question_tokenized feed_dict.answer = gdef.canonize_answer(metainfo.answer, metainfo.question_type) feed_dict.update(gdef.annotate_question(metainfo)) if self.question_transform is not None: self.question_transform(feed_dict) feed_dict.question = np.array( self.vocab.map_sequence(feed_dict.question), dtype="int64" ) return feed_dict.raw()
def __getitem__(self, index): metainfo = GView(self.get_metainfo(index)) feed_dict = GView() # metainfo annotations if self.incl_scene: feed_dict.scene = metainfo.scene feed_dict.update(gdef.annotate_objects(metainfo.scene)) if "objects" in feed_dict: # NB(Jiayuan Mao): in some datasets, object information might be completely unavailable. feed_dict.objects_raw = feed_dict.objects.copy() feed_dict.update(gdef.annotate_scene(metainfo.scene)) # image feed_dict.image_index = metainfo.image_index feed_dict.image_filename = metainfo.image_filename # video feed_dict.video_folder = metainfo.video_folder video = [] original_objects = feed_dict.objects if self.image_root is not None and feed_dict.image_filename is not None: feed_dict.image = Image.open( osp.join(self.image_root, feed_dict.image_filename)).convert("RGB") feed_dict.image, feed_dict.objects = self.image_transform( feed_dict.image, feed_dict.objects) # print("Image:", feed_dict.image.shape) # print(feed_dict.objects) if self.image_root is not None and feed_dict.video_folder is not None: import glob for name in glob.glob( osp.join(self.image_root, feed_dict.video_folder) + "/*.png"): image = Image.open(name).convert("RGB") image, _ = self.image_transform(image, original_objects) video += [image] feed_dict.video = torch.stack(video) # Tensor # print("Video:", feed_dict.video.shape) # program if "program_raw" in metainfo: feed_dict.program_raw = metainfo.program_raw feed_dict.program_seq = metainfo.program_seq feed_dict.program_tree = metainfo.program_tree feed_dict.program_qsseq = metainfo.program_qsseq feed_dict.program_qstree = metainfo.program_qstree feed_dict.question_type = metainfo.question_type # question feed_dict.question_index = metainfo.question_index feed_dict.question_raw = metainfo.question feed_dict.question_raw_tokenized = metainfo.question_tokenized feed_dict.question_metainfo = gdef.annotate_question_metainfo(metainfo) feed_dict.question = metainfo.question_tokenized feed_dict.answer = gdef.canonize_answer(metainfo.answer, metainfo.question_type) feed_dict.update(gdef.annotate_question(metainfo)) if self.question_transform is not None: self.question_transform(feed_dict) feed_dict.question = np.array(self.vocab.map_sequence( feed_dict.question), dtype="int64") return feed_dict.raw()
class ForwardContext(object): def __init__(self, training, *, loss=0, monitors=None, output_dict=None): self.training = training self.loss = loss self.monitors = GView(monitors) self.output_dict = GView(output_dict) self.hyperparameters = dict() def set_hyperparameter(self, key, value): self.hyperparameters[key] = value def get_hyperparameter(self, key, default=None): return self.hyperparameters.get(key, default=default) def add_loss(self, loss, key=None, accumulate=True): if float(accumulate) > 0: self.loss = self.loss + loss * float(accumulate) if key is not None: if f'loss/{key}' in self.monitors: self.monitors[f'loss/{key}'] += float(loss) else: self.monitors[f'loss/{key}'] = float(loss) return self def add_accuracy(self, accuracy, key): self.monitors[f'accuracy/{key}'] = float(accuracy) return self def add_output(self, output, key): self.output_dict[key] = output return self def update_monitors(self, monitors): self.monitors.update(monitors) return self def update_mo(self, monitors, output_dict): self.monitors.update(monitors) self.output_dict.update(output_dict) return self binary_classification_accuracy = _wrap_monitor_function( monitor.binary_classification_accuracy) classification_accuracy = _wrap_monitor_function( monitor.classification_accuracy) regression_accuracy = _wrap_monitor_function(monitor.regression_accuracy) monitor_rms = _wrap_monitor_function(monitor.monitor_rms) monitor_param_saturation = _wrap_monitor_function( monitor.monitor_param_saturation) monitor_param_rms = _wrap_monitor_function(monitor.monitor_param_rms) monitor_param_gradrms = _wrap_monitor_function( monitor.monitor_param_gradrms) monitor_param_gradrms_ratio = _wrap_monitor_function( monitor.monitor_param_gradrms_ratio) @wrap_custom_as_default(is_local=True) def as_default(self) -> 'ForwardContext': yield self def finalize(self): if self.training: return self.loss, self.monitors, self.output_dict else: self.output_dict.monitors = self.monitors return self.output_dict