def __init__(self, config, constants): AbstractModel.__init__(self, config, constants) self.none_action = config["num_actions"] num_channels, image_height, image_width = 3, 3, 3 self.text_module = ChaplotTextModule(emb_dim=32, hidden_dim=256, vocab_size=config["vocab_size"], image_height=image_height, image_width=image_width) self.final_module = FinalModule(self.text_module) if False: # config["do_object_detection"]: self.landmark_names = get_all_landmark_names() self.object_detection_module = PixelIdentificationModule( num_channels=num_channels, num_objects=67) else: self.object_detection_module = None if torch.cuda.is_available(): self.text_module.cuda() self.final_module.cuda() if self.object_detection_module is not None: self.object_detection_module.cuda()
def parse(folder_name): start = time.time() dataset = [] landmark_names = get_all_landmark_names() num_examples = len(os.listdir(folder_name)) for i in range(0, num_examples): example_folder_name = folder_name + "/example_" + str(i) images = [] for j in range(0, 6): img = scipy.misc.imread(example_folder_name + "/image_" + str(j) + ".png").swapaxes( 1, 2).swapaxes(0, 1) images.append(img) with open(example_folder_name + "/data.json", 'r') as fp: data = json.load(fp) new_data = dict() for key in data: new_data[landmark_names.index( key)] = SupervisedLearningDetectTurningAngle.fix_angle( data[key]) image_concatenate = np.concatenate(images, 0) dataset.append((image_concatenate, new_data)) end = time.time() logging.info("Parsed dataset of size %r in time % seconds", len(dataset), (end - start)) return dataset
def __init__(self, config, constants, image_module_path): self.none_action = config["num_actions"] self.landmark_names = get_all_landmark_names() self.image_module = ImageRyanResnetModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3, image_height=config["image_height"], image_width=config["image_width"], using_recurrence=True) # TODO: load pre-trained image module from file # image_module_path = "path/to/saved/image/module" if torch.cuda.is_available(): torch_load = torch.load else: torch_load = lambda f_: torch.load(f_, map_location=lambda s_, l_: s_) if image_module_path is not None: self.image_module.load_state_dict(torch_load(image_module_path)) total_emb_size = constants["image_emb_dim"] * 6 final_module = ImageRyanDetectionModule(image_module=self.image_module, image_emb_size=total_emb_size) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.final_module.cuda() self.image_module.init_weights()
def parse(folder_name): start = time.time() dataset = [] landmark_names = get_all_landmark_names() num_examples = len(os.listdir(folder_name)) for i in range(0, num_examples): example_folder_name = folder_name + "/example_" + str(i) image_names = [ file for file in os.listdir(example_folder_name) if file.endswith('.png') ] num_actions = len(image_names) for j in range(0, num_actions): img = scipy.misc.imread(example_folder_name + "/image_" + str(j) + ".png").swapaxes( 1, 2).swapaxes(0, 1) with open(example_folder_name + "/data_" + str(j) + ".json", 'r') as fp: data = json.load(fp) new_data = dict() for key in data: new_data[landmark_names.index(key)] = data[key] dataset.append((img, new_data)) end = time.time() logging.info("Parsed dataset of size %r in time % seconds", len(dataset), (end - start)) return dataset
def __init__(self, config, constants): AbstractIncrementalModel.__init__(self, config, constants) self.none_action = config["num_actions"] self.image_module = ImageResnetModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3, image_height=config["image_height"], image_width=config["image_width"], using_recurrence=True) # self.image_module = resnet.resnet18(pretrained=True) # constants["image_emb_dim"] = 1000 self.image_recurrence_module = IncrementalRecurrenceSimpleModule( input_emb_dim=constants["image_emb_dim"], output_emb_dim=constants["image_emb_dim"]) self.action_module = ActionSimpleModule( num_actions=config["num_actions"], action_emb_size=constants["action_emb_dim"]) total_emb_size = (2 * constants["image_emb_dim"] + constants["action_emb_dim"]) if config["do_action_prediction"]: self.action_prediction_module = ActionPredictionModule( 2 * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"]) else: self.action_prediction_module = None if config["do_temporal_autoencoding"]: self.temporal_autoencoder_module = TemporalAutoencoderModule( self.action_module, constants["image_emb_dim"], constants["action_emb_dim"], constants["image_emb_dim"]) else: self.temporal_autoencoder_module = None if config["do_object_detection"]: self.landmark_names = get_all_landmark_names() self.object_detection_module = ObjectDetectionModule( image_module=self.image_module, image_emb_size=constants["image_emb_dim"], num_objects=63) else: self.object_detection_module = None final_module = IncrementalMultimodalRecurrentSimpleGoalImageModule( image_module=self.image_module, image_recurrence_module=self.image_recurrence_module, action_module=self.action_module, total_emb_size=total_emb_size, num_actions=config["num_actions"]) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.image_recurrence_module.cuda() self.action_module.cuda() self.final_module.cuda() if self.action_prediction_module is not None: self.action_prediction_module.cuda() if self.temporal_autoencoder_module is not None: self.temporal_autoencoder_module.cuda() if self.object_detection_module is not None: self.object_detection_module.cuda()
def __init__(self, config, constants): self.none_action = config["num_actions"] self.landmark_names = get_all_landmark_names() self.text_module = TextPointerModule( emb_dim=constants["word_emb_dim"], hidden_dim=constants["lstm_emb_dim"], vocab_size=config["vocab_size"]) self.final_module = SegmentationFinalModule( text_module=self.text_module, text_emb_size=4 * constants["lstm_emb_dim"]) if torch.cuda.is_available(): self.text_module.cuda() self.final_module.cuda()
def __init__(self, config, constants): AbstractIncrementalModel.__init__(self, config, constants) self.none_action = config["num_actions"] self.image_module = UnetImageModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3, image_height=config["image_height"], image_width=config["image_width"], using_recurrence=True) num_channels, image_height, image_width = 64, 32, 32 self.num_cameras = 1 if config["use_pointer_model"]: raise NotImplementedError() else: self.text_module = ChaplotTextModule( emb_dim=32, hidden_dim=256, vocab_size=config["vocab_size"], image_height=image_height, image_width=image_width) if config["do_object_detection"]: self.landmark_names = get_all_landmark_names() self.object_detection_module = PixelIdentificationModule( num_channels=num_channels, num_objects=67) else: self.object_detection_module = None if config["do_goal_prediction"]: self.goal_prediction_module = None # GoalPredictionModule(total_emb_size=32) else: self.goal_prediction_module = None self.final_module = Unet5Contextual(in_channels=num_channels, out_channels=1, embedding_size=256) if torch.cuda.is_available(): self.image_module.cuda() self.text_module.cuda() self.final_module.cuda() if self.object_detection_module is not None: self.object_detection_module.cuda() if self.goal_prediction_module is not None: self.goal_prediction_module.cuda()
def __init__(self, config, constants): AbstractModel.__init__(self, config, constants) self.none_action = config["num_actions"] landmark_names = get_all_landmark_names() self.radius_module = RadiusModule(15) self.angle_module = AngleModule(48) self.landmark_module = LandmarkModule(63) self.image_module = ImageResnetModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3, image_height=config["image_height"], image_width=config["image_width"], using_recurrence=True) self.image_recurrence_module = RecurrenceSimpleModule( input_emb_dim=constants["image_emb_dim"], output_emb_dim=constants["image_emb_dim"]) self.text_module = SymbolicInstructionModule( radius_embedding=self.radius_module, theta_embedding=self.angle_module, landmark_embedding=self.landmark_module) self.action_module = ActionSimpleModule( num_actions=config["num_actions"], action_emb_size=constants["action_emb_dim"]) total_emb_size = (constants["image_emb_dim"] + 32 * 4 + constants["action_emb_dim"]) final_module = MultimodalRecurrentSimpleModule( image_module=self.image_module, image_recurrence_module=self.image_recurrence_module, text_module=self.text_module, action_module=self.action_module, total_emb_size=total_emb_size, num_actions=config["num_actions"]) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.text_module.cuda() self.action_module.cuda() self.final_module.cuda() self.radius_module.cuda() self.angle_module.cuda() self.landmark_module.cuda()
def __init__(self, config, constants): AbstractModel.__init__(self, config, constants) self.none_action = config["num_actions"] landmark_names = get_all_landmark_names() self.radius_module = RadiusModule(15) self.angle_module = AngleModule(48) self.landmark_module = LandmarkModule(63) self.image_module = SymbolicImageModule( landmark_names=landmark_names, radius_module=self.radius_module, angle_module=self.angle_module, landmark_module=self.landmark_module) if config["use_pointer_model"]: self.text_module = TextPointerModule( emb_dim=constants["word_emb_dim"], hidden_dim=constants["lstm_emb_dim"], vocab_size=config["vocab_size"]) else: self.text_module = TextSimpleModule( emb_dim=constants["word_emb_dim"], hidden_dim=constants["lstm_emb_dim"], vocab_size=config["vocab_size"]) self.action_module = ActionSimpleModule( num_actions=config["num_actions"], action_emb_size=constants["action_emb_dim"]) total_emb_size = (32 * 3 * 63 + constants["lstm_emb_dim"] + constants["action_emb_dim"]) final_module = MultimodalSimpleModule( image_module=self.image_module, text_module=self.text_module, action_module=self.action_module, total_emb_size=total_emb_size, num_actions=config["num_actions"]) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.text_module.cuda() self.action_module.cuda() self.final_module.cuda() self.radius_module.cuda() self.angle_module.cuda() self.landmark_module.cuda()
def __init__(self, config, constants, image_module_path): self.none_action = config["num_actions"] self.landmark_names = get_all_landmark_names() self.image_module = resnet.resnet18(pretrained=True) # TODO: load pre-trained image module from file # image_module_path = "path/to/saved/image/module" if torch.cuda.is_available(): torch_load = torch.load else: torch_load = lambda f_: torch.load(f_, map_location=lambda s_, l_: s_) if image_module_path is not None: self.image_module.load_state_dict(torch_load(image_module_path)) total_emb_size = 1000 final_module = ImageDetectionModule(image_module=self.image_module, image_emb_size=total_emb_size) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.final_module.cuda()
def __init__(self, config, constants): AbstractModel.__init__(self, config, constants) self.none_action = config["num_actions"] landmark_names = get_all_landmark_names() self.radius_module = RadiusModule(15) self.angle_module = AngleModule(48) self.landmark_module = LandmarkModule(63) self.image_module = SymbolicImageModule( landmark_names=landmark_names, radius_module=self.radius_module, angle_module=self.angle_module, landmark_module=self.landmark_module) self.text_module = SymbolicInstructionModule( radius_embedding=self.radius_module, theta_embedding=self.angle_module, landmark_embedding=self.landmark_module) self.action_module = ActionSimpleModule( num_actions=config["num_actions"], action_emb_size=constants["action_emb_dim"]) total_emb_size = (32 * 3 * 63 + 32 * 4 + constants["action_emb_dim"]) final_module = MultimodalSimpleModule( image_module=self.image_module, text_module=self.text_module, action_module=self.action_module, total_emb_size=total_emb_size, num_actions=config["num_actions"]) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.text_module.cuda() self.action_module.cuda() self.final_module.cuda() self.radius_module.cuda() self.angle_module.cuda() self.landmark_module.cuda()
def __init__(self, config, constants, final_model_type="unet", final_dimension=None): AbstractIncrementalModel.__init__(self, config, constants) self.none_action = config["num_actions"] self.image_module = UnetImageModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3, image_height=config["image_height"], image_width=config["image_width"], using_recurrence=True, final_dimension=final_dimension) num_channels, image_height, image_width = self.image_module.get_final_dimension( ) self.num_cameras = 1 self.image_recurrence_module = IncrementalRecurrenceChaplotModule( input_emb_dim=256, output_emb_dim=256) if config["use_pointer_model"]: raise NotImplementedError() else: self.text_module = ChaplotTextModule( emb_dim=32, hidden_dim=256, vocab_size=config["vocab_size"], image_height=image_height, image_width=image_width) if config["do_action_prediction"]: self.action_prediction_module = ActionPredictionModule( 2 * self.num_cameras * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"]) else: self.action_prediction_module = None if config["do_temporal_autoencoding"]: self.temporal_autoencoder_module = TemporalAutoencoderModule( self.action_module, self.num_cameras * constants["image_emb_dim"], constants["action_emb_dim"], constants["image_emb_dim"]) else: self.temporal_autoencoder_module = None if config["do_object_detection"]: self.landmark_names = get_all_landmark_names() self.object_detection_module = PixelIdentificationModule( num_channels=num_channels, num_objects=67) else: self.object_detection_module = None if config["do_symbolic_language_prediction"]: self.symbolic_language_prediction_module = SymbolicLanguagePredictionModule( total_emb_size=2 * constants["lstm_emb_dim"]) else: self.symbolic_language_prediction_module = None if config["do_goal_prediction"]: self.goal_prediction_module = None # GoalPredictionModule(total_emb_size=32) else: self.goal_prediction_module = None if final_model_type == "m4jksum1": self.final_module = IncrementalMultimodalAttentionChaplotModuleM4JKSUM1( image_module=self.image_module, image_recurrence_module=self.image_recurrence_module, text_module=self.text_module, max_episode_length=150, final_image_height=image_height, final_image_width=image_width) elif final_model_type == "unet": self.final_module = IncrementalUnetAttentionModuleJustProb( image_module=self.image_module, image_recurrence_module=self.image_recurrence_module, text_module=self.text_module, max_episode_length=150, final_image_height=image_height, final_image_width=image_width, in_channels=num_channels, out_channels=1, embedding_size=256) elif final_model_type == "unet-positional-encoding": self.final_module = IncrementalUnetAttentionModuleJustProbSpatialEncoding( image_module=self.image_module, image_recurrence_module=self.image_recurrence_module, text_module=self.text_module, max_episode_length=150, final_image_height=image_height, final_image_width=image_width, in_channels=num_channels, out_channels=1, embedding_size=256) elif final_model_type == "andrew": self.final_module = IncrementalMultimodalAttentionChaplotModuleM5AndrewV2( image_module=self.image_module, image_recurrence_module=self.image_recurrence_module, text_module=self.text_module, max_episode_length=150, final_image_height=image_height, final_image_width=image_width, normalize_filters=False) else: raise AssertionError("Unknown final model type ", final_model_type) if torch.cuda.is_available(): self.image_module.cuda() self.image_recurrence_module.cuda() self.text_module.cuda() self.final_module.cuda() if self.action_prediction_module is not None: self.action_prediction_module.cuda() if self.temporal_autoencoder_module is not None: self.temporal_autoencoder_module.cuda() if self.object_detection_module is not None: self.object_detection_module.cuda() if self.symbolic_language_prediction_module is not None: self.symbolic_language_prediction_module.cuda() if self.goal_prediction_module is not None: self.goal_prediction_module.cuda()
def __init__(self, config, constants): AbstractIncrementalModel.__init__(self, config, constants) self.none_action = config["num_actions"] self.image_module = ChaplotImageModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3, # TODO this value keeps changing. image_height=config["image_height"], image_width=config["image_width"], using_recurrence=True) self.num_cameras = 1 self.image_recurrence_module = IncrementalRecurrenceChaplotModule( input_emb_dim=256, output_emb_dim=256) if config["use_pointer_model"]: raise NotImplementedError() else: self.text_module = ChaplotTextModule( emb_dim=32, hidden_dim=256, vocab_size=config["vocab_size"], image_height=3, image_width=3) # TODO these 4, 4, are shaky and keep changing. if config["do_action_prediction"]: self.action_prediction_module = ActionPredictionModule( 2 * self.num_cameras * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"]) else: self.action_prediction_module = None if config["do_temporal_autoencoding"]: self.temporal_autoencoder_module = TemporalAutoencoderModule( self.action_module, self.num_cameras * constants["image_emb_dim"], constants["action_emb_dim"], constants["image_emb_dim"]) else: self.temporal_autoencoder_module = None if config["do_object_detection"]: self.landmark_names = get_all_landmark_names() self.object_detection_module = ObjectDetectionModule( image_module=self.image_module, image_emb_size=self.num_cameras * constants["image_emb_dim"], num_objects=67) else: self.object_detection_module = None if config["do_symbolic_language_prediction"]: self.symbolic_language_prediction_module = SymbolicLanguagePredictionModule( total_emb_size=2 * constants["lstm_emb_dim"]) else: self.symbolic_language_prediction_module = None if config["do_goal_prediction"]: self.goal_prediction_module = GoalPredictionModule( total_emb_size=32) else: self.goal_prediction_module = None self.final_module = IncrementalMultimodalChaplotModule( image_module=self.image_module, image_recurrence_module=self.image_recurrence_module, text_module=self.text_module, max_episode_length=(constants["horizon"] + constants["max_extra_horizon"]), final_image_height=3, final_image_width=3) # TODO these 4, 4, are shaky and keep changing. if torch.cuda.is_available(): self.image_module.cuda() self.image_recurrence_module.cuda() self.text_module.cuda() self.final_module.cuda() if self.action_prediction_module is not None: self.action_prediction_module.cuda() if self.temporal_autoencoder_module is not None: self.temporal_autoencoder_module.cuda() if self.object_detection_module is not None: self.object_detection_module.cuda() if self.symbolic_language_prediction_module is not None: self.symbolic_language_prediction_module.cuda() if self.goal_prediction_module is not None: self.goal_prediction_module.cuda()
def __init__(self, config, constants): AbstractIncrementalModel.__init__(self, config, constants) self.none_action = config["num_actions"] self.image_module = ImageResnetModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3, image_height=config["image_height"], image_width=config["image_width"], using_recurrence=True) self.num_cameras = 1 self.image_recurrence_module = IncrementalRecurrenceSimpleModule( input_emb_dim=(constants["image_emb_dim"] * self.num_cameras + constants["action_emb_dim"]), output_emb_dim=constants["image_emb_dim"]) if config["use_pointer_model"]: self.text_module = TextPointerModule( emb_dim=constants["word_emb_dim"], hidden_dim=constants["lstm_emb_dim"], vocab_size=config["vocab_size"]) else: self.text_module = TextBiLSTMModule( emb_dim=constants["word_emb_dim"], hidden_dim=constants["lstm_emb_dim"], vocab_size=config["vocab_size"]) self.action_module = ActionSimpleModule( num_actions=config["num_actions"], action_emb_size=constants["action_emb_dim"]) if config["use_pointer_model"]: total_emb_size = (constants["image_emb_dim"] + 4 * constants["lstm_emb_dim"] + constants["action_emb_dim"]) else: total_emb_size = ((self.num_cameras + 1) * constants["image_emb_dim"] + 2 * constants["lstm_emb_dim"] + constants["action_emb_dim"]) if config["do_action_prediction"]: self.action_prediction_module = ActionPredictionModule( 2 * self.num_cameras * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"]) else: self.action_prediction_module = None if config["do_temporal_autoencoding"]: self.temporal_autoencoder_module = TemporalAutoencoderModule( self.action_module, self.num_cameras * constants["image_emb_dim"], constants["action_emb_dim"], constants["image_emb_dim"]) else: self.temporal_autoencoder_module = None if config["do_object_detection"]: self.landmark_names = get_all_landmark_names() self.object_detection_module = ObjectDetectionModule( image_module=self.image_module, image_emb_size=self.num_cameras * constants["image_emb_dim"], num_objects=67) else: self.object_detection_module = None if config["do_symbolic_language_prediction"]: self.symbolic_language_prediction_module = SymbolicLanguagePredictionModule( total_emb_size=2 * constants["lstm_emb_dim"]) else: self.symbolic_language_prediction_module = None if config["do_goal_prediction"]: self.goal_prediction_module = GoalPredictionModule( total_emb_size=32) else: self.goal_prediction_module = None final_module = TmpIncrementalMultimodalDenseValtsRecurrentSimpleModule( image_module=self.image_module, image_recurrence_module=self.image_recurrence_module, text_module=self.text_module, action_module=self.action_module, total_emb_size=total_emb_size, num_actions=config["num_actions"]) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.image_recurrence_module.cuda() self.text_module.cuda() self.action_module.cuda() self.final_module.cuda() if self.action_prediction_module is not None: self.action_prediction_module.cuda() if self.temporal_autoencoder_module is not None: self.temporal_autoencoder_module.cuda() if self.object_detection_module is not None: self.object_detection_module.cuda() if self.symbolic_language_prediction_module is not None: self.symbolic_language_prediction_module.cuda() if self.goal_prediction_module is not None: self.goal_prediction_module.cuda()
def __init__(self, config, constants, use_image=False): AbstractIncrementalModel.__init__(self, config, constants) self.none_action = config["num_actions"] self.image_module = UnetImageModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3, image_height=config["image_height"], image_width=config["image_width"], using_recurrence=True) num_channels, image_height, image_width = self.image_module.get_final_dimension() self.num_cameras = 1 self.image_recurrence_module = IncrementalRecurrenceChaplotModule( input_emb_dim=256, output_emb_dim=256) if config["use_pointer_model"]: raise NotImplementedError() else: self.text_module = ChaplotTextModule( emb_dim=32, hidden_dim=256, vocab_size=config["vocab_size"], image_height=image_height, image_width=image_width) if config["do_action_prediction"]: self.action_prediction_module = ActionPredictionModule( 2 * self.num_cameras * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"]) else: self.action_prediction_module = None if config["do_temporal_autoencoding"]: self.temporal_autoencoder_module = TemporalAutoencoderModule( self.action_module, self.num_cameras * constants["image_emb_dim"], constants["action_emb_dim"], constants["image_emb_dim"]) else: self.temporal_autoencoder_module = None if config["do_object_detection"]: self.landmark_names = get_all_landmark_names() self.object_detection_module = PixelIdentificationModule( num_channels=num_channels, num_objects=67) else: self.object_detection_module = None if config["do_symbolic_language_prediction"]: self.symbolic_language_prediction_module = SymbolicLanguagePredictionModule( total_emb_size=2 * constants["lstm_emb_dim"]) else: self.symbolic_language_prediction_module = None if config["do_goal_prediction"]: self.goal_prediction_module = None # GoalPredictionModule(total_emb_size=32) else: self.goal_prediction_module = None if use_image: self.final_module = OracleGoldWithImage( image_recurrence_module=self.image_recurrence_module, image_module=self.image_module, max_episode_length=150, final_image_height=image_height, final_image_width=image_width) else: self.final_module = OracleGold( image_recurrence_module=self.image_recurrence_module, max_episode_length=150, final_image_height=image_height, final_image_width=image_width) if torch.cuda.is_available(): self.image_module.cuda() self.image_recurrence_module.cuda() self.text_module.cuda() self.final_module.cuda() if self.action_prediction_module is not None: self.action_prediction_module.cuda() if self.temporal_autoencoder_module is not None: self.temporal_autoencoder_module.cuda() if self.object_detection_module is not None: self.object_detection_module.cuda() if self.symbolic_language_prediction_module is not None: self.symbolic_language_prediction_module.cuda() if self.goal_prediction_module is not None: self.goal_prediction_module.cuda()
import torch import torch.optim as optim import collections import utils.generic_policy as gp import utils.debug_nav_drone_instruction as debug import utils.nav_drone_symbolic_instructions as nav_drone_symbolic_instructions from agents.agent_observed_state import AgentObservedState from agents.symbolic_text_replay_memory_item import SymbolicTextReplayMemoryItem from abstract_learning import AbstractLearning from utils.cuda import cuda_var #For error analysis/printing from dataset_agreement_nav_drone.nav_drone_dataset_parser import make_vocab_map from utils.nav_drone_landmarks import get_all_landmark_names LANDMARK_NAMES = get_all_landmark_names() NO_BUCKETS = 48 BUCKET_WIDTH = 7.5 class ParagraphSegmentationTrainTest(AbstractLearning): """ Trains model to predict symbolic form from the text """ def __init__(self, model, action_space, meta_data_util, config, constants, tensorboard): self.max_epoch = constants["max_epochs"] self.model = model self.action_space = action_space self.meta_data_util = meta_data_util self.config = config self.constants = constants
def __init__(self, config, constants): AbstractIncrementalModel.__init__(self, config, constants) self.none_action = config["num_actions"] landmark_names = get_all_landmark_names() self.radius_module = RadiusModule(15) self.angle_module = AngleModule(12) # (48) self.landmark_module = LandmarkModule(67) self.num_cameras = 1 self.image_module = ImageRyanResnetModule( image_emb_size=constants["image_emb_dim"], input_num_channels=3, image_height=config["image_height"], image_width=config["image_width"], using_recurrence=True) self.image_recurrence_module = IncrementalRecurrenceSimpleModule( input_emb_dim=constants["image_emb_dim"] * self.num_cameras, # + constants["action_emb_dim"], output_emb_dim=constants["image_emb_dim"]) self.text_module = SymbolicInstructionModule( radius_embedding=self.radius_module, theta_embedding=self.angle_module, landmark_embedding=self.landmark_module) self.action_module = ActionSimpleModule( num_actions=config["num_actions"], action_emb_size=constants["action_emb_dim"]) total_emb_size = ((self.num_cameras) * constants["image_emb_dim"] + 32 * 2 + constants["action_emb_dim"]) if config["do_action_prediction"]: self.action_prediction_module = ActionPredictionModule( 2 * self.num_cameras * constants["image_emb_dim"], constants["image_emb_dim"], config["num_actions"]) else: self.action_prediction_module = None if config["do_temporal_autoencoding"]: self.temporal_autoencoder_module = TemporalAutoencoderModule( self.action_module, self.num_cameras * constants["image_emb_dim"], constants["action_emb_dim"], constants["image_emb_dim"]) else: self.temporal_autoencoder_module = None if config["do_object_detection"]: self.landmark_names = get_all_landmark_names() self.object_detection_module = ObjectDetectionModule( image_module=self.image_module, image_emb_size=self.num_cameras * constants["image_emb_dim"], num_objects=67) else: self.object_detection_module = None final_module = IncrementalMultimodalRecurrentSimpleModule( image_module=self.image_module, image_recurrence_module=self.image_recurrence_module, text_module=self.text_module, action_module=self.action_module, total_emb_size=total_emb_size, num_actions=config["num_actions"]) self.final_module = final_module if torch.cuda.is_available(): self.image_module.cuda() self.image_recurrence_module.cuda() self.text_module.cuda() self.action_module.cuda() self.final_module.cuda() if self.action_prediction_module is not None: self.action_prediction_module.cuda() if self.temporal_autoencoder_module is not None: self.temporal_autoencoder_module.cuda() if self.object_detection_module is not None: self.object_detection_module.cuda()