예제 #1
0
    lambda x, y: (x[5] - x[7]) * (1 - y),  # bot - playery
    lambda x, y: x[6] * y,  # velocity
    lambda x, y: x[6] * (1 - y),
    lambda x, y: x[4] * y,  #  pipe distance
    lambda x, y: x[4] * (1 - y),
    # lambda x, y: (x[3] - x[7]) / (x[6] ) * (1 - y) + ,
]

STEPS_PER_EPOCHS = 1000
EPOCHS = 60
EPSILON_START = 0.01
EPSILON_DECAY = EPOCHS * STEPS_PER_EPOCHS
EPSILON_MIN = 0.00000
EPSILON_DECAY_V = (EPSILON_MIN - EPSILON_START) / EPSILON_DECAY

game = flappy.FlappyClone()
env = PLE(game,
          display_screen=True,
          force_fps=True,
          fps=30,
          state_preprocessor=preprocessor)
env.init()
approxQAgent = ApproxQAgent(env.getActionSet(),
                            env.getGameStateDims(),
                            features,
                            learningRate=.002)

reward = 0.
epsilon = EPSILON_START
for e in range(EPOCHS):
    avgloss = 0.
예제 #2
0
    FREEZE_INTERVAL = 1000
    LEARNING_RATE = 0.005
    LEARNING_RATE_DECAY = 0.99
    MOMENTUM = 0.95
    REPLAY_MEMORY_SIZE = 500000
    RMS_DECAY = 0.3
    RMS_EPSILON = 0.0001
    UPDATE_FREQUENCY = 2
    UPDATE_RULE = 'rmsprop'




if __name__ == "__main__":
    test = False
    game = flappy.FlappyClone(black=True)
    # game = Catcher(width=64, height=64)
    logging.basicConfig(level=logging.INFO)

    # --- Parse parameters ---
    parameters = process_args(sys.argv[1:], Defaults)
    if parameters.deterministic:
        rng = np.random.RandomState(523456)
    else:
        rng = np.random.RandomState()

    # --- Instantiate environment ---
    env = PLE_env(rng, game=game, frame_skip=parameters.frame_skip,
            ple_options={"display_screen": True, "force_fps":not test, "fps":30,
                         "rng":rng})
예제 #3
0
        if state[next_pipe_dist_to_player] < PIPE_DIST_DELTA:
            targetH = selectedPipeBotY - V_MARGIN
        else:
            targetH = selectedPipeBotY - DELTA_H

        if (state[player_y] > targetH):
            action = flap

        return self.possibleActions[action]


def preprocessor(state):
    return np.array([state[k] for k in sorted(state.keys())])


game = flappy.FlappyClone(crazy=False)
env = PLE(game, display_screen=True, force_fps=True, fps=30,
          state_preprocessor=preprocessor)
env.init()
expertAgent = ExpertAgent(env.getActionSet(), env.getGameStateDims())

for e in range(1, 101):
    while True:
        if env.game_over(): # if the game is over, reset
            print("test {}, death at score: {}".format(e, game.getScore()))
            env.game.tick(1. / 2.)
            env.reset_game()
            break
        reward = env.act(expertAgent.getAction(env.getGameState()))
        print("score={:010.1f}".format(game.getScore()), end="\r")
        env.game.tick(FPS)