lambda x, y: (x[5] - x[7]) * (1 - y), # bot - playery lambda x, y: x[6] * y, # velocity lambda x, y: x[6] * (1 - y), lambda x, y: x[4] * y, # pipe distance lambda x, y: x[4] * (1 - y), # lambda x, y: (x[3] - x[7]) / (x[6] ) * (1 - y) + , ] STEPS_PER_EPOCHS = 1000 EPOCHS = 60 EPSILON_START = 0.01 EPSILON_DECAY = EPOCHS * STEPS_PER_EPOCHS EPSILON_MIN = 0.00000 EPSILON_DECAY_V = (EPSILON_MIN - EPSILON_START) / EPSILON_DECAY game = flappy.FlappyClone() env = PLE(game, display_screen=True, force_fps=True, fps=30, state_preprocessor=preprocessor) env.init() approxQAgent = ApproxQAgent(env.getActionSet(), env.getGameStateDims(), features, learningRate=.002) reward = 0. epsilon = EPSILON_START for e in range(EPOCHS): avgloss = 0.
FREEZE_INTERVAL = 1000 LEARNING_RATE = 0.005 LEARNING_RATE_DECAY = 0.99 MOMENTUM = 0.95 REPLAY_MEMORY_SIZE = 500000 RMS_DECAY = 0.3 RMS_EPSILON = 0.0001 UPDATE_FREQUENCY = 2 UPDATE_RULE = 'rmsprop' if __name__ == "__main__": test = False game = flappy.FlappyClone(black=True) # game = Catcher(width=64, height=64) logging.basicConfig(level=logging.INFO) # --- Parse parameters --- parameters = process_args(sys.argv[1:], Defaults) if parameters.deterministic: rng = np.random.RandomState(523456) else: rng = np.random.RandomState() # --- Instantiate environment --- env = PLE_env(rng, game=game, frame_skip=parameters.frame_skip, ple_options={"display_screen": True, "force_fps":not test, "fps":30, "rng":rng})
if state[next_pipe_dist_to_player] < PIPE_DIST_DELTA: targetH = selectedPipeBotY - V_MARGIN else: targetH = selectedPipeBotY - DELTA_H if (state[player_y] > targetH): action = flap return self.possibleActions[action] def preprocessor(state): return np.array([state[k] for k in sorted(state.keys())]) game = flappy.FlappyClone(crazy=False) env = PLE(game, display_screen=True, force_fps=True, fps=30, state_preprocessor=preprocessor) env.init() expertAgent = ExpertAgent(env.getActionSet(), env.getGameStateDims()) for e in range(1, 101): while True: if env.game_over(): # if the game is over, reset print("test {}, death at score: {}".format(e, game.getScore())) env.game.tick(1. / 2.) env.reset_game() break reward = env.act(expertAgent.getAction(env.getGameState())) print("score={:010.1f}".format(game.getScore()), end="\r") env.game.tick(FPS)