def new_episode( self, args, scenes, possible_targets=None, targets=None, keep_obj=False, glove=None, ): self.done_count = 0 self.duplicate_count = 0 self.failed_action_count = 0 self.prev_frame = None self.current_frame = None if self.file is None: sample_scene = scenes[0] if "physics" in sample_scene: scene_num = sample_scene[len("FloorPlan"):-len("_physics")] else: scene_num = sample_scene[len("FloorPlan"):] scene_num = int(scene_num) scene_type = num_to_name(scene_num) task_type = args.test_or_val self.file = open( "test_val_split/" + scene_type + "_" + task_type + ".pkl", "rb") self.all_data = pickle.load(self.file) self.file.close() self.all_data_enumerator = 0 episode = self.all_data[self.all_data_enumerator] self.all_data_enumerator += 1 self._new_episode(args, episode)
def a3c_train( rank, args, create_shared_model, shared_model, initialize_agent, optimizer, res_queue, end_flag, scenes, ): setproctitle.setproctitle('Training Agent: {}'.format(rank)) targets = AI2THOR_TARGET_CLASSES[args.num_category] random.seed(args.seed + rank) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] torch.cuda.set_device(gpu_id) torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) player = initialize_agent(create_shared_model, args, rank, scenes, targets, gpu_id=gpu_id) compute_grad = not isinstance(player, RandomNavigationAgent) model_options = ModelOptions() episode_num = 0 while not end_flag.value: total_reward = 0 player.eps_len = 0 player.episode.episode_times = episode_num new_episode(args, player) player_start_time = time.time() while not player.done: player.sync_with_shared(shared_model) total_reward = run_episode(player, args, total_reward, model_options, True) loss = compute_loss(args, player, gpu_id, model_options) if compute_grad and loss['total_loss'] != 0: player.model.zero_grad() loss['total_loss'].backward() torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0) transfer_gradient_from_player_to_shared(player, shared_model, gpu_id) optimizer.step() if not player.done: reset_player(player) for k in loss: loss[k] = loss[k].item() end_episode( player, res_queue, title=num_to_name(int(player.episode.scene[9:])), total_time=time.time() - player_start_time, total_reward=total_reward, ) reset_player(player) episode_num = (episode_num + 1) % len(args.scene_types) player.exit()