def search(self,
               n_mcts,
               c,
               Env,
               mcts_env,
               budget,
               max_depth=200,
               fixed_depth=True):
        """ Perform the MCTS search from the root """
        env = copy.deepcopy(Env)
        self.create_root(env, budget)
        if self.root.terminal:
            raise (ValueError("Can't do tree search from a terminal state"))

        is_atari = is_atari_game(env)
        if is_atari:
            raise NotImplementedError
        while budget > 0:
            state = self.root  # reset to root for new trace
            if not is_atari:
                mcts_env = copy.deepcopy(
                    Env)  # copy original Env to rollout from
            else:
                raise NotImplementedError
            mcts_env.seed(np.random.randint(1e7))
            st = 0
            terminal = False
            while not state.terminal:
                bias = c * self.gamma**st / (
                    1 - self.gamma) if self.depth_based_bias else c
                action = state.select(c=bias, variance=self.variance)
                st += 1
                if action.child_state is not None:
                    state = action.child_state  # select
                    terminal, budget = state.sample(mcts_env, action.index,
                                                    budget)
                    if terminal:
                        break
                else:
                    rollout_depth = max_depth if fixed_depth else max_depth - st
                    state, budget = action.add_child_state(mcts_env,
                                                           budget,
                                                           rollout_depth,
                                                           depth=st)  # expand
                    break

            # Back-up

            R = state.V
            state.update()
            while state.parent_action is not None:  # loop back-up until root is reached
                if not terminal:
                    R = state.reward + self.gamma * R
                else:
                    R = state.reward
                    terminal = False
                action = state.parent_action
                action.update(R)
                state = action.parent_state
                state.update()
    def search(self, n_mcts, c, env, mcts_env, budget, max_depth=200, fixed_depth=True):
        """ Perform the MCTS search from the root """
        env = copy.deepcopy(env)
        self.create_root(env, budget)
        if self.root.terminal:
            raise (ValueError("Can't do tree search from a terminal state"))

        is_atari = is_atari_game(env)
        if is_atari:
            raise NotImplementedError
        while budget > 0:
            state = self.root  # reset to root for new trace
            if not is_atari:
                mcts_env = copy.deepcopy(env)  # copy original Env to rollout from
            else:
                raise NotImplementedError
            mcts_env.seed(np.random.randint(1e7))
            st = 0
            terminal = False

            while not state.terminal:

                bias = c * self.gamma ** st / (1 - self.gamma) if self.depth_based_bias else c
                action = state.select(c=bias, variance=self.variance)
                if action.child_state is not None:
                    parent_owner = state.owner
                    state = action.child_state  # select
                    terminal, budget = state.sample(mcts_env, action.index, budget, parent_owner)
                    if terminal:
                        break
                else:
                    rollout_depth = max_depth if fixed_depth else max_depth - st
                    state, budget = action.add_child_state(mcts_env, budget, rollout_depth, depth=st)  # expand
                    break

                if mcts_env.has_transitioned():
                    st += 1

                # If there are no more agent in the decision queue, a lap has been completed
                # and the ordering of the agents must be re-evaluated

            # Back-up

            R = np.zeros(mcts_env.agents_number)

            if not state.terminal:
                R = copy.deepcopy(state.V)

            state.update()
            agents_reward = copy.deepcopy(state.reward)
            while state.parent_action is not None:  # loop back-up until root is reached
                owner = state.parent_action.owner  # rewards are stored in the state following the action, which has different owner
                if not terminal:
                    if state.end_turn:
                        agents_reward = copy.deepcopy(state.reward)
                    try:
                        R[owner] = agents_reward[owner] + self.gamma * R[owner]
                    except TypeError:
                        print("R:", R)
                        print("agents_reward:", agents_reward)
                else:
                    if state.terminal:
                        R = copy.deepcopy(state.reward)
                    else: # ???
                        R[owner] = state.reward[owner]
                    terminal = False
                action = state.parent_action
                action.update(R[action.owner])
                state = action.parent_state
                state.update()
Esempio n. 3
0
    def search(self,
               n_mcts,
               c,
               env,
               mcts_env,
               budget,
               max_depth=200,
               fixed_depth=True):
        """ Perform the MCTS search from the root """
        env = copy.deepcopy(env)
        self.create_root(env, budget)
        if self.root.terminal:
            raise (ValueError("Can't do tree search from a terminal state"))

        is_atari = is_atari_game(env)
        if is_atari:
            raise NotImplementedError
        while budget > 0:
            state = self.root  # reset to root for new trace
            if not is_atari:
                mcts_env = copy.deepcopy(
                    env)  # copy original Env to rollout from
            else:
                raise NotImplementedError
            mcts_env.seed(np.random.randint(1e7))
            st = 0
            flag = False
            source_particle = None

            while not state.terminal:

                bias = c * self.gamma**st / (
                    1 - self.gamma) if self.depth_based_bias else c
                action = state.select(c=bias, variance=self.variance)

                k = np.ceil(self.beta * action.n**self.alpha)
                if action.child_state is not None:
                    state = action.child_state  # select
                    add_particle = k >= state.get_n_particles()
                    if add_particle and not flag:
                        flag = True
                        source_particle, budget = action.sample_from_parent_state(
                            mcts_env, budget)
                        state.add_particle(source_particle)
                        if source_particle.terminal:
                            break
                    elif flag:
                        source_particle, budget = action.sample_from_particle(
                            source_particle, mcts_env, budget)
                        state.add_particle(source_particle)
                        if source_particle.terminal:
                            break
                    elif state.terminal:
                        source_particle = np.random.choice(state.particles)
                        budget -= 1  # sample from the terminal states particles

                else:
                    rollout_depth = max_depth if fixed_depth else max_depth - st
                    state, budget, source_particle = action.add_child_state(
                        mcts_env,
                        budget,
                        max_depth=rollout_depth,
                        source_particle=source_particle,
                        depth=st)  # expand
                    break

                if mcts_env.has_transitioned():
                    st += 1

            # Back-up

            R = np.zeros(env.agents_number)

            if not state.terminal:
                R = copy.deepcopy(state.V)

            state.update()
            particle = source_particle
            agents_reward = copy.deepcopy(particle.reward)
            while state.parent_action is not None:  # loop back-up until root is reached
                owner = state.parent_action.owner  # rewards are stored in the state following the action, which has different owner
                r = particle.reward
                if state.end_turn:
                    agents_reward = copy.deepcopy(r)
                if not particle.terminal:
                    R[owner] = agents_reward[owner] + self.gamma * R[owner]
                else:
                    R = copy.deepcopy(r)
                action = state.parent_action
                action.update(R[action.owner])
                state = action.parent_state
                state.update()
                particle = particle.parent_particle
    def search(self,
               n_mcts,
               c,
               Env,
               mcts_env,
               budget,
               max_depth=200,
               fixed_depth=True):
        """ Perform the MCTS search from the root """
        env = copy.deepcopy(Env)
        self.create_root(env, budget)
        if self.root.terminal:
            raise (ValueError("Can't do tree search from a terminal state"))

        is_atari = is_atari_game(env)
        if is_atari:
            raise NotImplementedError
        while budget > 0:
            state = self.root  # reset to root for new trace
            if not is_atari:
                mcts_env = copy.deepcopy(
                    Env)  # copy original Env to rollout from
            else:
                raise NotImplementedError
            mcts_env.seed(np.random.randint(1e7))
            st = 0
            terminal = False
            while not state.terminal:
                bias = c * self.gamma**st / (
                    1 - self.gamma) if self.depth_based_bias else c
                action = state.select(c=bias, variance=self.variance)
                st += 1
                k = np.ceil(self.beta * action.n**self.alpha)
                if action.child_state is not None:
                    state = action.child_state  # select
                    add_particle = k >= state.get_n_particles()
                    if add_particle:
                        source_particle, budget = action.sample_from_parent_state(
                            mcts_env, budget)
                        state.add_particle(source_particle)
                        if source_particle.terminal:
                            terminal = True
                            break
                    else:
                        particle = state.sample_reward()
                        if state.terminal or particle.terminal:
                            terminal = True
                            budget -= 1  # sample from the terminal states particles

                else:
                    rollout_depth = max_depth if fixed_depth else max_depth - st
                    state, budget, source_particle = action.add_child_state(
                        mcts_env, budget, max_depth=rollout_depth,
                        depth=st)  # expand
                    terminal = source_particle.terminal
                    break

            # Back-up
            R = state.V
            state.update()
            while state.parent_action is not None:  # loop back-up until root is reached
                r = state.reward
                if not terminal:
                    R = r + self.gamma * R
                else:
                    R = r
                action = state.parent_action
                action.update(R)
                state = action.parent_state
                state.update()
    def search(self,
               n_mcts,
               c,
               Env,
               mcts_env,
               budget,
               max_depth=200,
               fixed_depth=True):
        """ Perform the MCTS search from the root """
        Envs = None
        if not self.sampler:
            Envs = [copy.deepcopy(Env) for _ in range(self.n_particles)]

        if self.root is None:
            # initialize new root with many equal particles

            signature = Env.get_signature()

            box = None
            to_box = getattr(self, "index_to_box", None)
            if callable(to_box):
                box = Env.index_to_box(signature["state"])

            particles = [
                Particle(state=signature,
                         seed=random.randint(0, 1e7),
                         reward=0,
                         terminal=False,
                         info=box) for _ in range(self.n_particles)
            ]
            self.root = State(parent_action=None,
                              na=self.na,
                              envs=Envs,
                              particles=particles,
                              sampler=self.sampler,
                              root=True,
                              budget=budget)
        else:
            self.root.parent_action = None  # continue from current root
            particles = self.root.particles

        if self.root.terminal:
            raise (ValueError("Can't do tree search from a terminal state"))

        is_atari = is_atari_game(Env)
        if is_atari:
            raise NotImplementedError
            snapshot = copy_atari_state(
                Env)  # for Atari: snapshot the root at the beginning

        while budget > 0:
            state = self.root  # reset to root for new trace
            if not is_atari:
                mcts_envs = None
                if not self.sampler:
                    mcts_envs = [
                        copy.deepcopy(Env) for i in range(self.n_particles)
                    ]  # copy original Env to rollout from
            else:
                raise NotImplementedError
                restore_atari_state(mcts_env, snapshot)
            st = 0
            while not state.terminal:
                action = state.select(c=c)
                st += 1
                # s1, r, t, _ = mcts_env.step(action.index)
                if hasattr(action, 'child_state'):
                    state = action.child_state  # select
                    if state.terminal:
                        budget -= len(state.particles)
                    continue
                else:
                    rollout_depth = max_depth if fixed_depth else max_depth - st
                    state, budget = action.add_child_state(
                        state, mcts_envs, budget, self.sampler,
                        rollout_depth)  # expand
                    break

            # Back-up
            R = state.V
            state.update()
            while state.parent_action is not None:  # loop back-up until root is reached
                if not state.terminal:
                    R = state.r + self.gamma * R
                else:
                    R = state.r
                action = state.parent_action
                action.update(R)
                state = action.parent_state
                state.update()