def write_to_file(): out_file_train = open( "youtube_scrape/urls_" + misc_util.get_time_str() + "_relevance_train.csv", "w") out_file_val = open( "youtube_scrape/urls_" + misc_util.get_time_str() + "_relevance_val.csv", "w") vid_ids = {} num_add_attempt = 0 while True: search_tuple, urls = queue.get() if urls is None: break if len(urls) == 0: continue for url in urls: num_add_attempt += 1 if url not in vid_ids: vid_ids[url] = [] vid_ids[url].append(": ".join(search_tuple)) print("num total", num_add_attempt, "num final", len(vid_ids)) sorted_vid_ids = sorted(vid_ids.keys()) random.shuffle(sorted_vid_ids) lines_val = sorted_vid_ids[:65536] lines_train = sorted_vid_ids[65536:] lines_val.sort() lines_train.sort() lines_train = [ '"' + key + '", "' + '", "'.join(vid_ids[key]) + '"' for key in lines_train ] lines_val = [ '"' + key + '", "' + '", "'.join(vid_ids[key]) + '"' for key in lines_val ] out_file_train.write("\n".join(lines_train) + "\n") out_file_train.close() out_file_val.write("\n".join(lines_val) + "\n") out_file_val.close()
def main(): args = parser.parse_args() args.tensorboard = not args.no_tensorboard args.load_model = not args.clear_weights args.save_checkpoints = not args.no_save_checkpoints if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) cudnn.deterministic = True warnings.warn( "You have chosen to seed training. " "This will turn on the CUDNN deterministic setting, " "which can slow down your training considerably! " "You may see unexpected behavior when restarting " "from checkpoints." ) log_prefix = args.log_prefix time_str = misc_util.get_time_str() checkpoint_dir = os.path.join(log_prefix, args.checkpoint_dirname, time_str) torch_devices = [int(gpu_id.strip()) for gpu_id in args.pytorch_gpu_ids.split(",")] args.gpu = torch_devices[0] device = "cuda:" + str(torch_devices[0]) model = ImagenetModel() model = pt_util.get_data_parallel(model, torch_devices) model.to(device) start_iter = 0 if args.load_model: start_iter = pt_util.restore_from_folder(model, os.path.join(log_prefix, args.checkpoint_dirname, "*")) args.start_epoch = start_iter train_logger = None test_logger = None if args.tensorboard: train_logger = tensorboard_logger.Logger( os.path.join(log_prefix, args.tensorboard_dirname, time_str + "_train") ) test_logger = tensorboard_logger.Logger(os.path.join(log_prefix, args.tensorboard_dirname, time_str + "_test")) main_worker(model, args.gpu, args, train_logger, test_logger, checkpoint_dir)
def setup(self, create_decoder): self.setup_device() render_gpus = [ int(gpu_id.strip()) for gpu_id in self.shell_args.render_gpu_ids.split(",") ] self.configs = [] self.env_types = [] for proc in range(self.shell_args.num_processes): extra_task_sensors = set() extra_agent_sensors = set() if self.shell_args.record_video or self.shell_args.update_encoder_features: extra_agent_sensors.add("DEPTH_SENSOR") if "SEMANTIC_SENSOR" in extra_agent_sensors: extra_task_sensors.append("CLASS_SEGMENTATION_SENSOR") if self.shell_args.dataset == "suncg": data_path = "data/datasets/pointnav/suncg/v1/{split}/{split}.json.gz" elif self.shell_args.dataset == "mp3d": data_path = "data/datasets/pointnav/mp3d/v1/{split}/{split}.json.gz" elif self.shell_args.dataset == "gibson": data_path = "data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz" else: raise NotImplementedError("No rule for this dataset.") config = get_dataset_config( data_path, self.shell_args.data_subset, self.shell_args.max_episode_length, render_gpus[proc % len(render_gpus)], list(extra_task_sensors), list(extra_agent_sensors), ) config.TASK.NUM_EPISODES_BEFORE_JUMP = self.shell_args.num_processes if self.shell_args.blind and not self.shell_args.record_video: config.SIMULATOR.RGB_SENSOR.HEIGHT = 2 config.SIMULATOR.RGB_SENSOR.WIDTH = 2 if self.shell_args.task == "pointnav": config.TASK.SUCCESS_REWARD = 2 config.TASK.SUCCESS_DISTANCE = 0.2 config.TASK.COLLISION_REWARD = 0 config.TASK.ENABLE_STOP_ACTION = False if self.shell_args.task == "pointnav": self.env_types.append(PointnavRLEnv) elif self.shell_args.task == "exploration": config.TASK.GRID_SIZE = 1 assert config.TASK.GRID_SIZE >= config.SIMULATOR.FORWARD_STEP_SIZE config.TASK.NEW_GRID_CELL_REWARD = 0.1 config.TASK.COLLISION_REWARD = 0 # -0.1 config.TASK.RETURN_VISITED_GRID = self.shell_args.record_video config.ENVIRONMENT.MAX_EPISODE_STEPS = 250 config.TASK.TOP_DOWN_MAP.DRAW_SOURCE_AND_TARGET = False self.env_types.append(ExplorationRLEnv) if self.shell_args.dataset == "suncg": config.TASK.NUM_EPISODES_BEFORE_JUMP = 5 else: config.TASK.NUM_EPISODES_BEFORE_JUMP = 5 elif self.shell_args.task == "flee": config.TASK.COLLISION_REWARD = 0 # -0.1 config.ENVIRONMENT.MAX_EPISODE_STEPS = 250 config.TASK.TOP_DOWN_MAP.DRAW_SOURCE_AND_TARGET = False self.env_types.append(RunAwayRLEnv) if self.shell_args.dataset == "suncg": config.TASK.NUM_EPISODES_BEFORE_JUMP = 5 else: config.TASK.NUM_EPISODES_BEFORE_JUMP = 5 else: raise NotImplementedError("Unknown task type") if self.shell_args.record_video: config.TASK.NUM_EPISODES_BEFORE_JUMP = -1 config.TASK.STEP_SIZE = config.SIMULATOR.FORWARD_STEP_SIZE config.TASK.TOP_DOWN_MAP.MAX_EPISODE_STEPS = config.ENVIRONMENT.MAX_EPISODE_STEPS config.TASK.TOP_DOWN_MAP.MAP_RESOLUTION = 1250 config.TASK.OBSERVE_BEST_NEXT_ACTION = self.shell_args.algo == "supervised" self.configs.append(config) if self.shell_args.debug: print("Config\n", self.configs[0]) self.shell_args.cuda = not self.shell_args.no_cuda and torch.cuda.is_available( ) if self.shell_args.blind: decoder_output_info = [] else: decoder_output_info = [("reconstruction", 3), ("depth", 1), ("surface_normals", 3)] if self.shell_args.encoder_network_type == "ShallowVisualEncoder": encoder_type = networks.ShallowVisualEncoder elif self.shell_args.encoder_network_type == "ResNetEncoder": encoder_type = networks.ResNetEncoder else: raise NotImplementedError("Unknown network type.") self.gym_action_space = gym.spaces.discrete.Discrete(len(ACTION_SPACE)) target_vector_size = None if self.shell_args.task == "pointnav": target_vector_size = 2 elif self.shell_args.task == "exploration" or self.shell_args.task == "flee": target_vector_size = 0 self.agent = VisualPolicy( self.gym_action_space, base=networks.RLBaseWithVisualEncoder, base_kwargs=dict( encoder_type=encoder_type, decoder_output_info=decoder_output_info, recurrent=True, end_to_end=self.shell_args.end_to_end, hidden_size=256, target_vector_size=target_vector_size, action_size=len(ACTION_SPACE), gpu_ids=self.torch_devices, create_decoder=create_decoder, blind=self.shell_args.blind, ), ) if self.shell_args.debug: print("actor critic", self.agent) self.agent.to(self.device) self.time_str = misc_util.get_time_str() visual_layers = self.agent.base.visual_encoder.module if self.shell_args.freeze_encoder_features: # Not necessary, but probably lets pytorch be more space efficient. for param in visual_layers.encoder.parameters(): param.requires_grad = False if self.shell_args.freeze_visual_decoder_features: if hasattr(visual_layers, "bridge"): for param in visual_layers.bridge.parameters(): param.requires_grad = False if hasattr(visual_layers, "decoder"): for param in visual_layers.decoder.parameters(): param.requires_grad = False if hasattr(visual_layers, "out"): for param in visual_layers.out.parameters(): param.requires_grad = False if hasattr(visual_layers, "class_pred_layer"): if visual_layers.class_pred_layer is not None: for param in visual_layers.class_pred_layer.parameters(): param.requires_grad = False if self.shell_args.freeze_motion_decoder_features and self.shell_args.freeze_policy_decoder_features: for param in self.agent.base.visual_projection.parameters(): param.requires_grad = False if self.shell_args.freeze_motion_decoder_features: for param in self.agent.base.egomotion_layer.parameters(): param.requires_grad = False for param in self.agent.base.motion_model_layer.parameters(): param.requires_grad = False if self.shell_args.freeze_policy_decoder_features: for param in self.agent.base.gru.parameters(): param.requires_grad = False for param in self.agent.base.rl_layers.parameters(): param.requires_grad = False for param in self.agent.base.critic_linear.parameters(): param.requires_grad = False for param in self.agent.dist.parameters(): param.requires_grad = False if self.shell_args.algo == "ppo": self.optimizer = optimizers.VisualPPO( self.agent, self.shell_args.clip_param, self.shell_args.ppo_epoch, self.shell_args.num_mini_batch, self.shell_args.value_loss_coef, self.shell_args.entropy_coef, lr=self.shell_args.lr, eps=self.shell_args.eps, max_grad_norm=self.shell_args.max_grad_norm, ) elif self.shell_args.algo == "supervised": self.optimizer = optimizers.BehavioralCloningOptimizer( self.agent, self.shell_args.clip_param, self.shell_args.ppo_epoch, self.shell_args.num_mini_batch, self.shell_args.value_loss_coef, self.shell_args.entropy_coef, lr=self.shell_args.lr, eps=self.shell_args.eps, ) else: raise NotImplementedError("No such algorithm") height = self.configs[0].SIMULATOR.RGB_SENSOR.HEIGHT width = self.configs[0].SIMULATOR.RGB_SENSOR.WIDTH self.observation_space = { "pointgoal": ((2, ), np.dtype(np.float32)), "prev_action_one_hot": ((len(ACTION_SPACE), ), np.dtype(np.float32)), } self.compute_surface_normals = self.shell_args.record_video or self.shell_args.update_encoder_features if self.shell_args.algo == "supervised": self.observation_space["best_next_action"] = (( len(ACTION_SPACE), ), np.dtype(np.float32)) if self.shell_args.update_encoder_features: self.observation_space["depth"] = ((1, height, width), np.dtype(np.float32)) if self.compute_surface_normals: self.observation_space["surface_normals"] = ((3, height, width), np.dtype( np.float32)) if not self.shell_args.end_to_end: self.observation_space["visual_encoder_features"] = ( (self.agent.base.num_output_channels, 256 // 2**5, 256 // 2**5), np.dtype(np.float32), ) # Send dummy batch through to allocate memory before vecenv print("Feeding dummy batch") dummy_start = time.time() self.agent.act( { "images": torch.rand(( self.shell_args.num_processes, 3, self.configs[0].SIMULATOR.RGB_SENSOR.HEIGHT, self.configs[0].SIMULATOR.RGB_SENSOR.WIDTH, )).to(self.device), "target_vector": torch.rand(self.shell_args.num_processes, target_vector_size).to(self.device), "prev_action_one_hot": torch.rand(self.shell_args.num_processes, self.gym_action_space.n).to(self.device), }, torch.rand(self.shell_args.num_processes, self.agent.recurrent_hidden_state_size).to(self.device), torch.rand(self.shell_args.num_processes, 1).to(self.device), ) print("Done feeding dummy batch %.3f" % (time.time() - dummy_start)) self.start_iter = 0 self.checkpoint_dir = os.path.join(self.shell_args.log_prefix, self.shell_args.checkpoint_dirname, self.time_str)
def main(): torch_devices = [ int(gpu_id.strip()) for gpu_id in args.pytorch_gpu_ids.split(",") ] render_gpus = [ int(gpu_id.strip()) for gpu_id in args.render_gpu_ids.split(",") ] device = "cuda:" + str(torch_devices[0]) decoder_output_info = [("reconstruction", 3), ("depth", 1), ("surface_normals", 3)] if USE_SEMANTIC: decoder_output_info.append(("semantic", 41)) model = ShallowVisualEncoder(decoder_output_info) model = pt_util.get_data_parallel(model, torch_devices) model = pt_util.DummyScope(model, ["base", "visual_encoder"]) model.to(device) print("Model constructed") print(model) train_transforms = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(), transforms.RandomCrop(224) ]) train_transforms_depth = transforms.Compose([ PIL.Image.fromarray, transforms.RandomHorizontalFlip(), transforms.RandomCrop(224), np.array ]) train_transforms_semantic = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(), transforms.RandomCrop(224) ]) sensors = ["RGB_SENSOR", "DEPTH_SENSOR" ] + (["SEMANTIC_SENSOR"] if USE_SEMANTIC else []) if args.dataset == "suncg": data_train = HabitatImageGenerator( render_gpus, "suncg", args.data_subset, "data/dumps/suncg/{split}/dataset_one_ep_per_scene.json.gz", images_before_reset=1000, sensors=sensors, transform=train_transforms, depth_transform=train_transforms_depth, semantic_transform=train_transforms_semantic, ) print("Num train images", len(data_train)) data_test = HabitatImageGenerator( render_gpus, "suncg", "val", "data/dumps/suncg/{split}/dataset_one_ep_per_scene.json.gz", images_before_reset=1000, sensors=sensors, ) elif args.dataset == "mp3d": data_train = HabitatImageGenerator( render_gpus, "mp3d", args.data_subset, "data/dumps/mp3d/{split}/dataset_one_ep_per_scene.json.gz", images_before_reset=1000, sensors=sensors, transform=train_transforms, depth_transform=train_transforms_depth, semantic_transform=train_transforms_semantic, ) print("Num train images", len(data_train)) data_test = HabitatImageGenerator( render_gpus, "mp3d", "val", "data/dumps/mp3d/{split}/dataset_one_ep_per_scene.json.gz", images_before_reset=1000, sensors=sensors, ) elif args.dataset == "gibson": data_train = HabitatImageGenerator( render_gpus, "gibson", args.data_subset, "data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz", images_before_reset=1000, sensors=sensors, transform=train_transforms, depth_transform=train_transforms_depth, semantic_transform=train_transforms_semantic, ) print("Num train images", len(data_train)) data_test = HabitatImageGenerator( render_gpus, "gibson", "val", "data/datasets/pointnav/gibson/v1/{split}/{split}.json.gz", images_before_reset=1000, sensors=sensors, ) else: raise NotImplementedError("No rule for this dataset.") print("Num train images", len(data_train)) print("Num val images", len(data_test)) print("Using device", device) print("num cpus:", args.num_processes) train_loader = torch.utils.data.DataLoader( data_train, batch_size=BATCH_SIZE, num_workers=args.num_processes, worker_init_fn=data_train.worker_init_fn, shuffle=False, pin_memory=True, ) test_loader = torch.utils.data.DataLoader( data_test, batch_size=TEST_BATCH_SIZE, num_workers=len(render_gpus) if args.num_processes > 0 else 0, worker_init_fn=data_test.worker_init_fn, shuffle=False, pin_memory=True, ) log_prefix = args.log_prefix time_str = misc_util.get_time_str() checkpoint_dir = os.path.join(log_prefix, args.checkpoint_dirname, time_str) optimizer = optim.Adam(model.parameters(), lr=args.lr) start_iter = 0 if args.load_model: start_iter = pt_util.restore_from_folder( model, os.path.join(log_prefix, args.checkpoint_dirname, "*")) train_logger = None test_logger = None if args.tensorboard: train_logger = tensorboard_logger.Logger( os.path.join(log_prefix, args.tensorboard_dirname, time_str + "_train")) test_logger = tensorboard_logger.Logger( os.path.join(log_prefix, args.tensorboard_dirname, time_str + "_test")) total_num_steps = start_iter if args.save_checkpoints and not args.no_weight_update: pt_util.save(model, checkpoint_dir, num_to_keep=5, iteration=total_num_steps) evaluate_model(model, device, test_loader, total_num_steps, test_logger, decoder_output_info) for epoch in range(0, EPOCHS + 1): total_num_steps = train_model(model, device, train_loader, optimizer, total_num_steps, train_logger, decoder_output_info, checkpoint_dir) evaluate_model(model, device, test_loader, total_num_steps, test_logger, decoder_output_info)
import numpy import numpy as np import torch from dg_util.python_utils import misc_util from torch import nn numpy.set_printoptions(precision=4) torch.set_printoptions(precision=4, sci_mode=False) def batch_norm_layer(channels): return nn.BatchNorm2d(channels) def nonlinearity(): return nn.ReLU(inplace=True) NONLINEARITY = nonlinearity NORM_LAYER = batch_norm_layer TIME_STR = misc_util.get_time_str() BASE_LOG_DIR = "logs" CHECK_FOR_NEW_DATA = False IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32) * 255 IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32) * 255 COOKIE_PATH = os.path.join(os.path.dirname(__file__), "youtube_scrape", "cookies.txt")