예제 #1
0
파일: game.py 프로젝트: loan181/dojo-maze
    def startGame(self):
        self._stepN = 0
        self._action_point = 0
        self._won = False
        if self._view is not None:
            self._view.draw(self._stepN, self._action_point)

        for self._stepN in range(1, 1000):
            step(self) # User defined

            if self._debug:
                if self._view is not None:
                    self._view.draw(self._stepN, self._action_point)
                self.printMaze()

            if self._maze.isPlayerOnGoal():
                self._won = True
                break

        if self._debug:
            if self._won:
                print("Bravo! (", self._action_point, "actions )")
            else:
                print("Echec! Tu as dépassé le nombre d'actions permises")
        return (self._won, self._action_point, self._stepN)
예제 #2
0
 def _twisted_stop(self):
     """
     Stop Twisted reactor and wait until it is done
     """
     if self._twisted_stopped:
         return
     self.stop()
     while not self._twisted_stopped:
         main.step()
예제 #3
0
def evaluate_agent(agent, env, logger):
    global skip_frame_rate
    for i in range(0, 30):
        env.reset()
        obs = preprocess_observation(env.reset(), img_size)
        # initial_buffer = []
        # for j in range(skip_frame_rate):
        # initial_buffer.append(obs)
        state = np.maximum(obs, obs)  # np.array(initial_buffer)
        current_state = [state, state, state, state]
        current_state = np.asarray([current_state])
        reward = 0
        # agent actions
        done = False
        while not done:
            action = agent.get_action(current_state)
            logger.add_agent_action(action)
            obs, r, done, info = step(env, action, agent)

            # next_state = np.array(next_state)
            next_state = np.asarray([[
                current_state[0][1], current_state[0][2], current_state[0][3],
                obs
            ]])

            current_state = next_state

            reward += r
        logger.add_reward(reward)
        logger.save_agent_action()
    logger.save_agent_action_avg_std()
예제 #4
0
def run_agent(agent, env):
    global skip_frame_rate
    for i in range(0, 100):
        env.reset()
        obs = preprocess_observation(env.reset(), img_size)
        state = np.maximum(obs, obs)
        current_state = [state, state, state, state]
        current_state = np.asarray([current_state])

        reward = 0
        done = False
        while not done:
            action = agent.get_action(current_state)

            obs, r, done, info = step(env, action, agent)

            next_state = np.asarray([[
                current_state[0][1], current_state[0][2], current_state[0][3],
                obs
            ]])

            current_state = next_state
            reward += r

        print(reward)
예제 #5
0
def test_step_moons():
    input = """<x=-1, y=0, z=2>
<x=2, y=-10, z=-7>
<x=4, y=-8, z=8>
<x=3, y=5, z=-1>"""
    moons = parse_input(input)
    step(moons)
    assert moons[0].position == [2, -1, 1]
    assert moons[1].position == [3, -7, -4]
    assert moons[2].position == [1, -7, 5]
    assert moons[3].position == [2, 2, 0]

    assert moons[0].velocity == [3, -1, -1]
    assert moons[1].velocity == [1, 3, 3]
    assert moons[2].velocity == [-3, 1, -3]
    assert moons[3].velocity == [-1, -3, 1]
예제 #6
0
    def test_v1_part1(self):
        from main import step, parseInput

        fish = parseInput(sample1)
        for i in range(0, 80):
            fish = step(fish)

        self.assertEqual(len(fish), 5934)
예제 #7
0
def test_step(digit,permutation,answer):
    assert answer == main.step(digit, permutation)
    while True:

        visited = visited_count[state] if state in visited_count else 0
        epsilon = n0 / (n0 + visited)

        # count the state visit
        if state in visited_count:
            visited_count[state] += 1
        else:
            visited_count[state] = 1

        # choose action
        action = e_greedy(q, state, epsilon)

        # execute action, update curr total return counts
        reward, next_state = step(state, action)
        if (state, action) not in curr_g:
            curr_g[(state, action)] = 0
        for i in curr_g.keys():
            curr_g[i] += reward

        if next_state is None:
            break
        else:
            state = next_state

    # update value function
    for i in curr_g.keys():
        # count the state-action visit
        if i in av_count:
            av_count[i] += 1
예제 #9
0
    fig.savefig(lossfname)
    plt.close()


def plot_scatter(model, in_, out_, scatterfname):
    pred = model(in_)
    fig, ax = plt.subplots()
    ax.scatter(pred.data.numpy(), out_.data.numpy())
    fig.savefig(scatterfname)
    plt.close()


for N in [50, 500, 1000, 5000]:
    for dx in [1, 10, 100, 1000, 10000, 100000]:
        t0 = time.time()
        # lossfname = "./plots/loss-{}-{}.png".format(N, dx)
        # scatterfname = "./plots/scatter-{}-{}.png".format(N, dx)
        # print("running ", N, dx)
        start = time.time()
        x, y, w = main.init_data(N, dx, dy)
        m, o = main.instantiate_model(dx, dy)
        # losses = []
        for i in range(n_iterations):
            m, o, l = main.step(x, y, m, o)
            # losses.append(l)
        t1 = time.time()

        print("{} {} {}".format(N, dx, "%.2f" % (t1 - t0)))
        # plot_loss(losses, lossfname)
        # plot_scatter(m, x, y, scatterfname)