예제 #1
0
 def getMove(self):
     if self.isBlueBan() or self.isBluePick():
         return self.mcts(StateNode(parent=None, state=self),
                          teamIsBlue=True,
                          n=self.rolloutNumber)
     else:
         return self.mcts(StateNode(parent=None, state=self),
                          teamIsBlue=False,
                          n=self.rolloutNumber)
예제 #2
0
def run_experiment(intrinsic_motivation, gamma, c, mc_n, runs, steps):
    trajectories = []
    start = np.array([50, 50])
    true_belief = True

    for _ in range(runs):
        goal = draw_goal(start, 6)
        manual = draw_goal(start, 3)
        print("Goal: {}".format(goal))
        print("Manual: {}".format(manual))

        world = state.ToyWorld([100, 100], intrinsic_motivation, goal, manual)
        belief = None
        if true_belief:
            belief = dict(
                zip(
                    [
                        state.ToyWorldAction(np.array([0, 1])),
                        state.ToyWorldAction(np.array([0, -1])),
                        state.ToyWorldAction(np.array([1, 0])),
                        state.ToyWorldAction(np.array([-1, 0])),
                    ],
                    [[10, 10, 10, 10], [10, 10, 10, 10], [10, 10, 10, 10], [10, 10, 10, 10]],
                )
            )
        root_state = state.ToyWorldState(start, world, belief=belief)
        print(root_state.pos)
        next_state = StateNode(None, root_state, 0)
        trajectory = []
        for _ in range(steps):
            try:
                ba = mcts_search(next_state, gamma, c=c, n=mc_n)
                print("")
                print("=" * 80)
                print("State: {}".format(next_state.state))
                print("Belief: {}".format(next_state.state.belief))
                print("Reward: {}".format(next_state.reward))
                print("N: {}".format(next_state.n))
                print("Q: {}".format(next_state.q))
                print("Action: {}".format(ba.action))
                trajectory.append(next_state.state.pos)
                if (next_state.state.pos == np.array(goal)).all():
                    break
                next_s = next_state.children[ba].sample_state(real_world=True)
                next_state = next_s
                next_state.parent = None
            except KeyboardInterrupt:
                break
        trajectories.append(trajectory)
        with open(gen_name("trajectories", "pkl"), "w") as f:
            pickle.dump(trajectories, f)
        print("=" * 80)
예제 #3
0
def run_experiment(intrinsic_motivation, gamma, c, mc_n, runs, steps):
    trajectories = []
    start = np.array([50, 50])
    true_belief = True

    for _ in range(runs):
        goal = draw_goal(start, 6)
        manual = draw_goal(start, 3)
        print("Goal: {}".format(goal))
        print("Manual: {}".format(manual))

        world = state.ToyWorld([100, 100], intrinsic_motivation, goal, manual)
        belief = None
        if true_belief:
            belief = dict(
                zip([
                    state.ToyWorldAction(np.array([0, 1])),
                    state.ToyWorldAction(np.array([0, -1])),
                    state.ToyWorldAction(np.array([1, 0])),
                    state.ToyWorldAction(np.array([-1, 0]))
                ], [[10, 10, 10, 10], [10, 10, 10, 10], [10, 10, 10, 10],
                    [10, 10, 10, 10]]))
        root_state = state.ToyWorldState(start, world, belief=belief)
        print(root_state.pos)
        next_state = StateNode(None, root_state, 0)
        trajectory = []
        for _ in range(steps):
            try:
                ba = mcts_search(next_state, gamma, c=c, n=mc_n)
                print("")
                print("=" * 80)
                print("State: {}".format(next_state.state))
                print("Belief: {}".format(next_state.state.belief))
                print("Reward: {}".format(next_state.reward))
                print("N: {}".format(next_state.n))
                print("Q: {}".format(next_state.q))
                print("Action: {}".format(ba.action))
                trajectory.append(next_state.state.pos)
                if (next_state.state.pos == np.array(goal)).all():
                    break
                next_s = next_state.children[ba].sample_state(real_world=True)
                next_state = next_s
                next_state.parent = None
            except KeyboardInterrupt:
                break
        trajectories.append(trajectory)
        with open(gen_name("trajectories", "pkl"), "w") as f:
            pickle.dump(trajectories, f)
        print("=" * 80)
예제 #4
0
def test_ucb1():
    ucb1 = tree_policies.UCB1(1)
    parent = StateNode(None, UCBTestState())
    an = parent.children[0]

    an.n = 1
    parent.n = 1
    assert ucb1(an) == 0

    an.n = 0
    parent.n = 1
    assert np.isnan(ucb1(an))

    an.n = 1
    parent.n = 0
    assert np.isnan(ucb1(an))

    an.q = 1
    an.n = 1
    parent.n = 1
    assert ucb1(an) == 1
예제 #5
0
    def Search(self, players, deck, turn):
        # c = sqrt(2)
        tree_policy = tree_policies.UCB1(np.sqrt(2))
        # default_policy = default_policies.random_terminal_roll_out
        default_policy = default_policies.RandomKStepRollOut(50)
        backup = backups.monte_carlo

        current_state = self.set_current_state(deck)
        state = action_and_state.GOFishState(current_state)

        root_node = StateNode(None, state)

        mcts_run = mcts.MCTS(tree_policy, default_policy, backup)
        action = mcts_run(root_node, n=800)
        action = {'requestedPlayer': players[action[0]], 'card': action[1]}
        return action
예제 #6
0
        pos[movei[2],movei[3]] = pos[movei[0],movei[1]]
        pos[movei[0],movei[1]] = 0
    else:
        ValueErr("error")

print(np.flipud(pos))



mcts = MCTS(tree_policy=Go(c=5), 
            default_policy=RandomKStepRollOut_Value(20, 0.95),
            backup=monte_carlo)

policy_fun = policy_nn()
rollout_fun = rollout_nn()
value_fun = value_nn() 

root = StateNode(None, ChessState(pos, 1, policy_fun, rollout_fun, value_fun, False ))
best_action = mcts(root, n=500)

pr.disable()
s = io.StringIO()
sortby = 'cumulative'
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print(s.getvalue())

print(best_action.action)


예제 #7
0
파일: paintingtest.py 프로젝트: silky/CrIB
def run_experiment(intrinsic_motivation, gamma, c, mc_n, runs, steps, problem):
    st1 = time.time()
    # trajectories = []
    start = np.array([50, 50])
    true_belief = True

    mcts_search = MCTS(tree_policy=UCB1(c=1.41),
                       default_policy=immediate_reward,
                       backup=monte_carlo)

    rewards = []
    for r in range(runs):
        sta = time.time()
        print("RUN number", r)
        goal = draw_goal(start, 6)
        # manual = draw_goal(start, 3)
        # print("Goal: {}".format(goal))

        world = PaintingWorld((100, 100), False, (100, 100), problem)
        belief = None
        root_state = PaintingWorldState((0, 0), (1, 1, 1), world)
        if true_belief:
            belief = {}
            for action in root_state.actions:
                belief[action] = [1] * len(root_state.actions)
            root_state.belief = belief
        # print(root_state.pos)
        next_state = StateNode(None, root_state)
        # trajectory =[]
        rew = 0
        for step in range(steps):
            st = time.time()
            ba = mcts_search(next_state, n=mc_n)
            # print("=" * 80)
            # print("State: {}".format(next_state.state))
            # # print("Belief: {}".format(next_state.state.belief))
            # print("Reward: {}".format(next_state.reward))
            # print("N: {}".format(next_state.n))
            # print("Q: {}".format(next_state.q))
            # print("Action: {}".format(ba.action))
            # trajectory.append(next_state.state.pos)
            rew = next_state.reward
            if (next_state.state.pos == np.array(goal)).all():
                break
            next_s = next_state.children[ba].sample_state(real_world=True)
            next_state = next_s
            next_state.parent = None

            en = time.time()
            print("step", step, "time elapsed", en - st)

            if step >= 5 and rew > 0.5:
                break

            # except KeyboardInterrupt:
            #     break
        # trajectories.append(trajectory)
        # print (next_state.reward)
        rewards.append(rew)

        # with open(gen_name("trajectories", "pkl"), "w") as f:
        #     pickle.dump(trajectories, f)
        # print("=" * 80)
        end = time.time()
        print("run", r, "time elapsed", end - sta)
        # if rewards[-1] > 0:
        #     break
    w = max(rewards)
    print("REWARD", w)
    end1 = time.time()
    print("problem time elapsed", end1 - st1)
    return w
print("Video Sizes: %r" % (problem.videoSizes,))
print("Endpoints:\n\t%s" % ("\n\t".join([str(e) for e in problem.endpoints])))
print("Requests: %r" % ([r for r in problem.requests]))

# Generate initial state
initial_contents = list([(0, []) for _ in range(problem.nCaches)])
initial_score = 0
initial_state = TreeState(caches_contents=initial_contents, score=initial_score,
                          problem=problem)

# Generate the optimal end state
mcts = MCTS(tree_policy=UCB1(c=1.41),
            default_policy=immediate_reward,
            backup=monte_carlo)

node = StateNode(parent=None, state=initial_state)

while True:
    if node.state.is_terminal():
        print("Terminal node reached.")
        break
    print("Finding best action")
    best_action = mcts(node)
    print("Performing action")
    node = StateNode(parent=None, state=node.state.perform(best_action))
    print("Score now is: %d" % node.state.score)

    print("Saving output")
    print(node.state.caches_contents)

    contents = node.state.caches_contents