예제 #1
0
    def new_episode(
        self,
        args,
        scenes,
        possible_targets=None,
        targets=None,
        keep_obj=False,
        glove=None,
    ):
        self.done_count = 0
        self.duplicate_count = 0
        self.failed_action_count = 0
        self.prev_frame = None
        self.current_frame = None

        if self.file is None:
            sample_scene = scenes[0]
            if "physics" in sample_scene:
                scene_num = sample_scene[len("FloorPlan"):-len("_physics")]
            else:
                scene_num = sample_scene[len("FloorPlan"):]
            scene_num = int(scene_num)
            scene_type = num_to_name(scene_num)
            task_type = args.test_or_val
            self.file = open(
                "test_val_split/" + scene_type + "_" + task_type + ".pkl",
                "rb")
            self.all_data = pickle.load(self.file)
            self.file.close()
            self.all_data_enumerator = 0

        episode = self.all_data[self.all_data_enumerator]
        self.all_data_enumerator += 1
        self._new_episode(args, episode)
예제 #2
0
def a3c_train(
        rank,
        args,
        create_shared_model,
        shared_model,
        initialize_agent,
        optimizer,
        res_queue,
        end_flag,
        scenes,
):
    setproctitle.setproctitle('Training Agent: {}'.format(rank))

    targets = AI2THOR_TARGET_CLASSES[args.num_category]

    random.seed(args.seed + rank)
    gpu_id = args.gpu_ids[rank % len(args.gpu_ids)]

    torch.cuda.set_device(gpu_id)
    torch.manual_seed(args.seed + rank)
    if gpu_id >= 0:
        torch.cuda.manual_seed(args.seed + rank)

    player = initialize_agent(create_shared_model, args, rank, scenes, targets, gpu_id=gpu_id)
    compute_grad = not isinstance(player, RandomNavigationAgent)

    model_options = ModelOptions()

    episode_num = 0

    while not end_flag.value:

        total_reward = 0
        player.eps_len = 0
        player.episode.episode_times = episode_num
        new_episode(args, player)
        player_start_time = time.time()

        while not player.done:
            player.sync_with_shared(shared_model)
            total_reward = run_episode(player, args, total_reward, model_options, True)
            loss = compute_loss(args, player, gpu_id, model_options)
            if compute_grad and loss['total_loss'] != 0:
                player.model.zero_grad()
                loss['total_loss'].backward()
                torch.nn.utils.clip_grad_norm_(player.model.parameters(), 100.0)
                transfer_gradient_from_player_to_shared(player, shared_model, gpu_id)
                optimizer.step()
            if not player.done:
                reset_player(player)

        for k in loss:
            loss[k] = loss[k].item()

        end_episode(
            player,
            res_queue,
            title=num_to_name(int(player.episode.scene[9:])),
            total_time=time.time() - player_start_time,
            total_reward=total_reward,
        )
        reset_player(player)

        episode_num = (episode_num + 1) % len(args.scene_types)

    player.exit()