def load_model(self, model_path, args): if args.model == 'NON_ADAPTIVE_A3C': self.model = BaseModel(args) elif args.model == 'GCN': self.model = GCN(args) else: self.model = SAVN(args) saved_state = torch.load(model_path, map_location=lambda storage, loc: storage) self.model.load_state_dict(saved_state) self.model_options = ModelOptions() self.model_options.params = get_params(self.model, args.gpu_id)
def nonadaptivea3c_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, ): glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.train_scenes) random.seed(args.seed + rank) idx = [j for j in range(len(args.scene_types))] random.shuffle(idx) setproctitle.setproctitle("Training Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] import torch torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id) compute_grad = not isinstance(player, RandomNavigationAgent) model_options = ModelOptions() j = 0 while not end_flag.value: # Get a new episode. total_reward = 0 player.eps_len = 0 new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]], glove=glove) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, True) # Compute the loss. loss = compute_loss(args, player, gpu_id, model_options) if compute_grad: # Compute gradient. player.model.zero_grad() loss["total_loss"].backward() torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0) # Transfer gradient to shared model and step optimizer. transfer_gradient_from_player_to_shared( player, shared_model, gpu_id) optimizer.step() # Clear actions and repackage hidden. if not player.done: reset_player(player) for k in loss: loss[k] = loss[k].item() end_episode( player, res_queue, title=args.scene_types[idx[j]], total_time=time.time() - player_start_time, total_reward=total_reward, ) reset_player(player) j = (j + 1) % len(args.scene_types) player.exit()
def nonadaptivea3c_val( rank, args, model_to_open, model_create_fn, initialize_agent, res_queue, max_count, scene_type, ): glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.val_scenes) num = name_to_num(scene_type) scenes = scenes[num] targets = targets[num] if scene_type == "living_room": args.max_episode_length = 200 else: args.max_episode_length = 100 setproctitle.setproctitle("Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) shared_model = model_create_fn(args) if model_to_open != "": saved_state = torch.load( model_to_open, map_location=lambda storage, loc: storage ) shared_model.load_state_dict(saved_state['model']) player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id) player.sync_with_shared(shared_model) count = 0 model_options = ModelOptions() j = 0 while count < max_count: # Get a new episode. total_reward = 0 player.eps_len = 0 new_episode(args, player, scenes, possible_targets, targets, glove=glove) player_start_state = copy.deepcopy(player.environment.controller.state) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, False) # Compute the loss. loss = compute_loss(args, player, gpu_id, model_options) if not player.done: reset_player(player) for k in loss: loss[k] = loss[k].item() spl, best_path_length = compute_spl(player, player_start_state) bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success) end_episode( player, res_queue, total_time=time.time() - player_start_time, total_reward=total_reward, spl=spl, **bucketed_spl, ) count += 1 reset_player(player) j = (j + 1) % len(args.scene_types) player.exit() res_queue.put({"END": True})
def savn_val( rank, args, model_to_open, model_create_fn, initialize_agent, res_queue, max_count, scene_type, glove_file=None, img_file=None, ): # glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.val_scenes) num = name_to_num(scene_type) scenes = scenes[num] targets = targets[num] if scene_type == "living_room": args.max_episode_length = 200 else: args.max_episode_length = 100 setproctitle.setproctitle("Training Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] import torch torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) shared_model = model_create_fn(args) if model_to_open is not None: saved_state = torch.load(model_to_open, map_location=lambda storage, loc: storage) shared_model.load_state_dict(saved_state) player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id) player.sync_with_shared(shared_model) count = 0 player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id) player_actions = {} model_options = ModelOptions() while count < max_count: count += 1 start_time = time.time() new_episode(args, player, scenes, possible_targets, targets, glove=glove_file, img_file=None) player_start_state = copy.deepcopy(player.environment.controller.state) if args.verbose: print(player_start_state) player.episode.exploring = True total_reward = 0 player.eps_len = 0 # theta <- shared_initialization params_list = [get_params(shared_model, gpu_id)] model_options.params = params_list[-1] loss_dict = {} reward_dict = {} episode_num = 0 num_gradients = 0 player_actions['scene'] = player.environment.scene_name player_actions['target_object'] = player.episode.task_data[0] player_actions['positions'] = [] player_actions['positions'].append(str(player_start_state)) while True: total_reward = run_episode(player, args, total_reward, model_options, False) player_actions['positions'].append( str(player.environment.controller.state)) if player.done: break if args.gradient_limit < 0 or episode_num < args.gradient_limit: num_gradients += 1 # Compute the loss. learned_loss = compute_learned_loss(args, player, gpu_id, model_options) if args.verbose: print("inner gradient") inner_gradient = torch.autograd.grad( learned_loss["learned_loss"], [v for _, v in params_list[episode_num].items()], create_graph=True, retain_graph=True, allow_unused=True, ) params_list.append( SGD_step(params_list[episode_num], inner_gradient, args.inner_lr)) model_options.params = params_list[-1] reset_player(player) episode_num += 1 for k, v in learned_loss.items(): loss_dict["{}/{:d}".format(k, episode_num)] = v.item() loss = compute_loss(args, player, gpu_id, model_options) player_actions['success'] = player.success for k, v in loss.items(): loss_dict[k] = v.item() reward_dict["total_reward"] = total_reward spl, best_path_length = compute_spl(player, player_start_state) bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success, player.actions[-1], player.arrive) if args.record_route: with open( '/home/duhm/Code/savn_deployment/players_action_test.json', 'a') as write_file: json.dump(player_actions, write_file) end_episode( player, res_queue, total_time=time.time() - start_time, spl=spl, **reward_dict, **bucketed_spl, ) reset_player(player) player.exit() res_queue.put({"END": True})
def savn_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, ): glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.train_scenes) random.seed(args.seed + rank) idx = [j for j in range(len(args.scene_types))] random.shuffle(idx) setproctitle.setproctitle("Training Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] import torch torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id) model_options = ModelOptions() j = 0 while not end_flag.value: start_time = time.time() new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]], glove=glove) player.episode.exploring = True total_reward = 0 player.eps_len = 0 # theta <- shared_initialization params_list = [get_params(shared_model, gpu_id)] model_options.params = params_list[-1] loss_dict = {} reward_dict = {} episode_num = 0 num_gradients = 0 # Accumulate loss over all meta_train episodes. while True: # Run episode for k steps or until it is done or has made a mistake (if dynamic adapt is true). if args.verbose: print("New inner step") total_reward = run_episode(player, args, total_reward, model_options, True) if player.done: break if args.gradient_limit < 0 or episode_num < args.gradient_limit: num_gradients += 1 # Compute the loss. learned_loss = compute_learned_loss(args, player, gpu_id, model_options) if args.verbose: print("inner gradient") inner_gradient = torch.autograd.grad( learned_loss["learned_loss"], [v for _, v in params_list[episode_num].items()], create_graph=True, retain_graph=True, allow_unused=True, ) params_list.append( SGD_step(params_list[episode_num], inner_gradient, args.inner_lr)) model_options.params = params_list[-1] # reset_player(player) episode_num += 1 for k, v in learned_loss.items(): loss_dict["{}/{:d}".format(k, episode_num)] = v.item() loss = compute_loss(args, player, gpu_id, model_options) for k, v in loss.items(): loss_dict[k] = v.item() reward_dict["total_reward"] = total_reward if args.verbose: print("meta gradient") # Compute the meta_gradient, i.e. differentiate w.r.t. theta. meta_gradient = torch.autograd.grad( loss["total_loss"], [v for _, v in params_list[0].items()], allow_unused=True, ) end_episode( player, res_queue, title=args.scene_types[idx[j]], episode_num=0, total_time=time.time() - start_time, total_reward=total_reward, ) # Copy the meta_gradient to shared_model and step. transfer_gradient_to_shared(meta_gradient, shared_model, gpu_id) optimizer.step() reset_player(player) j = (j + 1) % len(args.scene_types) player.exit()
def a3c_val( rank, args, model_to_open, model_create_fn, initialize_agent, res_queue, max_count, scene_type, scenes, ): targets = AI2THOR_TARGET_CLASSES[args.num_category] if scene_type == "living_room": args.max_episode_length = 200 else: args.max_episode_length = 100 setproctitle.setproctitle("Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) shared_model = model_create_fn(args) if model_to_open != "": saved_state = torch.load( model_to_open, map_location=lambda storage, loc: storage ) shared_model.load_state_dict(saved_state) player = initialize_agent(model_create_fn, args, rank, scenes, targets, gpu_id=gpu_id) player.sync_with_shared(shared_model) count = 0 model_options = ModelOptions() while count < max_count: total_reward = 0 player.eps_len = 0 new_episode(args, player) player_start_state = copy.deepcopy(player.environment.controller.state) player_start_time = time.time() while not player.done: player.sync_with_shared(shared_model) total_reward = run_episode(player, args, total_reward, model_options, False, shared_model) if not player.done: reset_player(player) spl, best_path_length = compute_spl(player, player_start_state) bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success) end_episode( player, res_queue, total_time=time.time() - player_start_time, total_reward=total_reward, spl=spl, **bucketed_spl, ) count += 1 reset_player(player) player.exit() res_queue.put({"END": True})
def nonadaptivea3c_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, global_ep, ): glove = None protos = None pre_metadata = None curriculum_meta = None scene_types = args.scene_types if args.glove_file: glove = Glove(args.glove_file) if args.proto_file: protos = Prototype(args.proto_file) if args.data_source == "ithor": from datasets.ithor_data import get_data scenes, possible_targets, targets = get_data(scene_types, args.train_scenes) elif args.data_source == "robothor": from datasets.robothor_data import get_data # check if use pinned_scene mode if args.pinned_scene: # TODO: design a flexible scene allocating strategy scene_types = [scene_types[(rank % len(scene_types))]] pre_metadata = preload_metadata(args, scene_types) scenes, possible_targets, targets = get_data(scene_types) if args.curriculum_learning: curriculum_meta = get_curriculum_meta(args, scenes) # is pinned_scene set to True, pre-load all metadata for controller # constructed in new_episode() random.seed(args.seed + rank) idx = list(range(len(scene_types))) random.shuffle(idx) setproctitle.setproctitle("Training Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] import torch torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id) compute_grad = not isinstance(player, RandomNavigationAgent) model_options = ModelOptions() j = 0 while not end_flag.value: # Get a new episode. total_reward = 0 player.eps_len = 0 # new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]],glove=glove, protos=protos, # pre_metadata=pre_metadata, curriculum_meta=curriculum_meta, total_ep=global_ep.value) scene = new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]],glove=glove, protos=protos, pre_metadata=pre_metadata, curriculum_meta=curriculum_meta) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, True) # plot trajectory , by wuxiaodong if args.demo_trajectory and global_ep.value % args.demo_trajectory_freq == 0: print(len(player.episode.episode_trajectories)) # todo delete # scene = 'FloorPlan_Train1_1' trajectory_pil = get_trajectory(scene, [str(loc) for loc in player.episode.episode_trajectories], birdview_root='./demo_robothor/data/birdview/', init_loc_str=player.episode.init_pos_str, target_loc_str=player.episode.target_pos_str, actions=player.episode.actions_taken, success=player.success, target_name=player.episode.target_object) demo_out_dir = os.path.join(args.log_dir, '../output_trajecgtory', args.title) if not os.path.exists(demo_out_dir): os.makedirs(demo_out_dir) trajectory_pil.save(os.path.join(demo_out_dir, '{}_init_{}_target_{}_iter{}.png'.format( player.episode.object_type, player.episode.init_pos_str, player.episode.target_pos_str, global_ep.value ))) print('ploting {}_init_{}_target_{}_iter{}.png'.format( player.episode.object_type, player.episode.init_pos_str, player.episode.target_pos_str, global_ep.value )) # Compute the loss. loss = compute_loss(args, player, gpu_id, model_options) if compute_grad: # Compute gradient. player.model.zero_grad() loss["total_loss"].backward() torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0) # Transfer gradient to shared model and step optimizer. transfer_gradient_from_player_to_shared(player, shared_model, gpu_id) optimizer.step() # Clear actions and repackage hidden. if not player.done: reset_player(player) # print("Training Agent {}: finished episodes on {}, local loss {}".format( # rank, scene, loss.cpu().detach().numpy() )) for k in loss: loss[k] = loss[k].item() end_episode( player, res_queue, title=scene_types[idx[j]], total_time=time.time() - player_start_time, total_reward=total_reward, policy_loss=loss['policy_loss'], value_loss=loss['value_loss'] ) reset_player(player) j = (j + 1) % len(scene_types) player.exit()
def nonadaptivea3c_val( rank, args, model_to_open, model_create_fn, initialize_agent, res_queue, max_count, scene_type, ): glove = None protos = None pre_metadata = None curriculum_meta = None scene_types = [scene_type] offline_shortest_data = None if args.glove_file: glove = Glove(args.glove_file) if args.proto_file: protos = Prototype(args.proto_file) if args.data_source == "ithor": from datasets.ithor_data import get_data, name_to_num scenes, possible_targets, targets = get_data(scene_types, args.val_scenes) num = name_to_num(scene_type) scenes = scenes[0] targets = targets[0] elif args.data_source == "robothor": from datasets.robothor_data import get_data # TODO: design a flexible scene allocating strategy pre_metadata = preload_metadata(args, scene_types) scenes, possible_targets, targets = get_data(scene_types) scenes = scenes[0] targets = targets[0] if args.curriculum_learning: curriculum_meta = get_curriculum_meta(args, scenes) if args.offline_shortest_data: offline_shortest_data = load_offline_shortest_path_data( args, scenes) setproctitle.setproctitle("Val Agent: {}".format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) shared_model = model_create_fn(args) if model_to_open != "": saved_state = torch.load(model_to_open, map_location=lambda storage, loc: storage) shared_model.load_state_dict(saved_state) player = initialize_agent(model_create_fn, args, rank, gpu_id=gpu_id) player.sync_with_shared(shared_model) count = 0 model_options = ModelOptions() while count < max_count: # Get a new episode. total_reward = 0 player.eps_len = 0 scene = new_episode(args, player, scenes, possible_targets, targets, glove=glove, protos=protos, pre_metadata=pre_metadata, curriculum_meta=curriculum_meta) if scene == None: # iteration stopped break player_start_state = copy.deepcopy(player.environment.controller.state) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. # player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, False) # Compute the loss. # loss = compute_loss(args, player, gpu_id, model_options) if not player.done: reset_player(player) # for k in loss: # loss[k] = loss[k].item() if offline_shortest_data: # assume data_source == robothor and curriculum_learning is True scene = player.environment.scene_name episode_id = player.episode.episode_id best_path_length = offline_shortest_data[scene][episode_id] spl = player.success * (best_path_length / float(player.eps_len)) else: spl, best_path_length = compute_spl(player, player_start_state) bucketed_spl = get_bucketed_metrics(spl, best_path_length, player.success) if args.curriculum_learning: end_episode(player, res_queue, total_time=time.time() - player_start_time, total_reward=total_reward, spl=spl, **bucketed_spl, scene_type=scene_type, difficulty=player.episode.difficulty) else: end_episode( player, res_queue, total_time=time.time() - player_start_time, total_reward=total_reward, spl=spl, **bucketed_spl, scene_type=scene_type, ) count += 1 reset_player(player) player.exit() res_queue.put({ "END": True, "scene_type": scene_type, "total_episodes": count })
def a3c_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, scenes, ): setproctitle.setproctitle('Training Agent: {}'.format(rank)) targets = AI2THOR_TARGET_CLASSES[args.num_category] random.seed(args.seed + rank) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) player = initialize_agent(create_shared_model, args, rank, scenes, targets, gpu_id=gpu_id) compute_grad = not isinstance(player, RandomNavigationAgent) model_options = ModelOptions() episode_num = 0 while not end_flag.value: total_reward = 0 player.eps_len = 0 player.episode.episode_times = episode_num new_episode(args, player) player_start_time = time.time() while not player.done: player.sync_with_shared(shared_model) total_reward = run_episode(player, args, total_reward, model_options, True) loss = compute_loss(args, player, gpu_id, model_options) if compute_grad and loss['total_loss'] != 0: player.model.zero_grad() loss['total_loss'].backward() torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0) transfer_gradient_from_player_to_shared(player, shared_model, gpu_id) optimizer.step() if not player.done: reset_player(player) for k in loss: loss[k] = loss[k].item() end_episode( player, res_queue, title=num_to_name(int(player.episode.scene[9:])), total_time=time.time() - player_start_time, total_reward=total_reward, ) reset_player(player) episode_num = (episode_num + 1) % len(args.scene_types) player.exit()
def nonadaptivea3c_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, ): # print('Now Im in nonadaptivea3c_train') glove = Glove(args.glove_file) scenes, possible_targets, targets = get_data(args.scene_types, args.train_scenes) # print('We have glove embeddings and data wow') random.seed(args.seed + rank) idx = [j for j in range(len(args.scene_types))] random.shuffle(idx) # print('scene types have been shuffled') setproctitle.setproctitle("Training Agent: {}".format(rank)) # print('some set proctitle bullshit') gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] # print('something gpu id') import torch # print('Torch imported') torch.cuda.set_device(gpu_id) # print('Looks like we cannot set device, cuda is a bunch of fuckers for gpu id : {}'.format(gpu_id)) torch.manual_seed(args.seed + rank) if gpu_id >= 0: # print('gpu id > 0, who knows what happens next') torch.cuda.manual_seed(args.seed + rank) # print('Done doing gpu bullshit') player = initialize_agent(create_shared_model, args, rank, gpu_id=gpu_id) # print('agent initialized') compute_grad = not isinstance(player, RandomNavigationAgent) # print('Something something compute gradient') model_options = ModelOptions() j = 0 print('Right before while loop') while not end_flag.value: # Get a new episode. total_reward = 0 player.eps_len = 0 new_episode(args, player, scenes[idx[j]], possible_targets, targets[idx[j]], glove=glove) player_start_time = time.time() # Train on the new episode. while not player.done: # Make sure model is up to date. player.sync_with_shared(shared_model) # Run episode for num_steps or until player is done. total_reward = run_episode(player, args, total_reward, model_options, True) # Compute the loss. loss = compute_loss(args, player, gpu_id, model_options) if compute_grad: # Compute gradient. player.model.zero_grad() loss["total_loss"].backward() torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0) # Transfer gradient to shared model and step optimizer. transfer_gradient_from_player_to_shared( player, shared_model, gpu_id) optimizer.step() # Clear actions and repackage hidden. if not player.done: reset_player(player) for k in loss: loss[k] = loss[k].item() end_episode( player, res_queue, title=args.scene_types[idx[j]], total_time=time.time() - player_start_time, total_reward=total_reward, ) reset_player(player) j = (j + 1) % len(args.scene_types) print('End of while loop') player.exit()