Ejemplo n.º 1
0
game = FlappyBird()
p = PLE(game,
        fps=30,
        display_screen=True,
        force_fps=False,
        state_preprocessor=process_state)
p.init()
game.ple = p

#number of the game we launch. This way we avoid overwriting files
#that were created with other params
number_experiment = randint(0, 10000000)

#agent
action_set = p.getActionSet()
agent = QLearnerEvolverFlappy(len(action_set), p.getGameStateDims()[1])
agent.should_epsilon_decay = False  #to control the decay differently
# agent.load("flappy1_100.h5")

nb_games = 1  #game counter
nb_frames = 0  #frame counter
score_game = 0  #score of the current game

#to average last losses and scores
last_losses = deque(maxlen=1000)
last_500_games_score = deque(maxlen=500)

#flags to write in files only once per x game
flag_game_10 = False
flag_game_100 = False
flag_game_500 = False
    return np.array([list(state.values())])


game = FlappyBird()
p = PLE(game,
        fps=30,
        display_screen=True,
        force_fps=False,
        state_preprocessor=process_state)
p.init()
game.ple = p
p.init()

#agent
action_set = p.getActionSet()
agent = PolicyNetwork(len(action_set), p.getGameStateDims()[1])

#some flags and variables
nb_games = 1
nb_frames = 0
last_losses = deque(maxlen=1000)
flag_game_10 = False
flag_game_100 = False
flag_game_500 = False
score_game = 0
last_500_games_score = deque(maxlen=500)

#variables linked to epsilon decrease
EXPLORE = 300000  #small is 300000, big is 5000000
FINAL_EPSILON = 0.0001
INITIAL_EPSILON = 0.8
Ejemplo n.º 3
0
STEPS_PER_EPOCHS = 1000
EPOCHS = 60
EPSILON_START = 0.01
EPSILON_DECAY = EPOCHS * STEPS_PER_EPOCHS
EPSILON_MIN = 0.00000
EPSILON_DECAY_V = (EPSILON_MIN - EPSILON_START) / EPSILON_DECAY

game = flappy.FlappyClone()
env = PLE(game,
          display_screen=True,
          force_fps=True,
          fps=30,
          state_preprocessor=preprocessor)
env.init()
approxQAgent = ApproxQAgent(env.getActionSet(),
                            env.getGameStateDims(),
                            features,
                            learningRate=.002)

reward = 0.
epsilon = EPSILON_START
for e in range(EPOCHS):
    avgloss = 0.
    avgreward = 0.
    for s in range(STEPS_PER_EPOCHS):
        if env.game_over():  # if the game is over, reset
            # print("tick {} death at score: {}".format(e * STEPS_PER_EPOCHS + s, game.getScore()))
            env.reset_game()
            obs = env.getGameState()
            action = approxQAgent.getAction(obs, epsilon)
game = FlappyBird()
p = PLE(game,
        fps=30,
        display_screen=True,
        force_fps=False,
        state_preprocessor=process_state)
p.init()
game.ple = p
p.init()

#print(p.getActionSet())

#agent
action_set = p.getActionSet()
agent = RandomSearch(len(action_set), p.getGameStateDims()[1])

# agent.load("flappy1_100.h5")

nb_games = 1
nb_frames = 0
flag_game_10 = False
flag_game_100 = False
flag_game_50 = False
score_game = 0

last_50_games_score = deque(maxlen=50)

EXPLORE = 5000000  #small is 300000, big is 5000000
FINAL_EPSILON = 0.0001
INITIAL_EPSILON = 0.1
Ejemplo n.º 5
0
class MyEnv(Environment):
    VALIDATION_MODE = 0
    # original size is 288x512 so dividing

    def __init__(self, rng, game=None, frame_skip=4,
            ple_options={"display_screen": True, "force_fps":True, "fps":30}):

        self._mode = -1
        self._mode_score = 0.0
        self._mode_episode_count = 0

        self._frame_skip = frame_skip if frame_skip >= 1 else 1
        self._random_state = rng
        self._hist_size = 1

        if game is None:
            raise ValueError("Game must be provided")


        self._ple = PLE(game, **ple_options)
        self._ple.init()

        self._actions = self._ple.getActionSet()
        self._state_size = self._ple.getGameStateDims()[0]
        self._state_saved = np.zeros((self._state_size), dtype=np.float32)
        self.previous_score = 0.
        self.episode_scores = []


    def reset(self, mode):
        if mode == MyEnv.VALIDATION_MODE:
            if self._mode != MyEnv.VALIDATION_MODE:
                self._mode = MyEnv.VALIDATION_MODE
                self._mode_score = 0.0
                self.episode_scores = []
                self.previous_score = .0
                self._mode_episode_count = 0
            else:
                self._mode_episode_count += 1
                self.episode_scores.append(self._mode_score - self.previous_score)
                self.previous_score = self._mode_score
        elif self._mode != -1: # and thus mode == -1
            self._mode = -1

        # print("Dead at score {}".format(self._ple.game.getScore()))
        self._ple.reset_game()
        for _ in range(self._random_state.randint(self._hist_size)):
             self._ple.act(self._ple.NOOP)

        return [[[0] * self._state_size] * self._hist_size]


    def act(self, action):
        action = self._actions[action]

        reward = 0
        for _ in range(self._frame_skip):
            reward += self._ple.act(action)

            if self.inTerminalState():
                break

        self._state_saved = self._ple.getGameState()
        self._mode_score += reward
        if self.inTerminalState():
            pass

        return reward #np.sign(reward)

    def summarizePerformance(self, test_data_set):
        if self.inTerminalState() == False:
            self._mode_episode_count += 1
        maxscore = max(self.episode_scores) if len(self.episode_scores) else "N/A"
        print("== Max score of episode is {} over {} episodes ==".format(
            maxscore, self._mode_episode_count))


    def inputDimensions(self):
        return [(self._hist_size, self._state_size)]

    def observationType(self, subject):
        return np.float32

    def nActions(self):
        return len(self._actions)

    def observe(self):
        return [np.array(self._state_saved)]

    def inTerminalState(self):
        return self._ple.game_over()
Ejemplo n.º 6
0
class PygameLearningEnvironment(Environment):

    def __init__(self, game_name, rewards, state_as_image = True, fps = 30, force_fps=True, frame_skip=2,
                 hold_action=2, visualize=False, width=84, height=84, lives=1):
        """
        Initialize Pygame Learning Environment
        https://github.com/ntasfi/PyGame-Learning-Environment

        Args:
            env_name: PLE environment

            fps: frames per second
            force_fps: False for slower speeds
            frame_skip: number of env frames to skip
            hold_action: number of env frames to hold each action for
            isRGB: get color or greyscale version of statespace #isRGB = False,
            game_height,game_width: height and width of environment
            visualize: If set True, the program will visualize the trainings, will slow down training
            lives: number of lives in game. Game resets on game over (ie lives = 0). only in Catcher and Pong (score)

        """

        self.env_name = game_name
        self.rewards = rewards
        self.lives = lives
        self.state_as_image = state_as_image
        self.fps = fps #30  # frames per second
        self.force_fps = force_fps #True  # False for slower speeds
        self.frame_skip = frame_skip  # frames to skip
        self.ple_num_steps = hold_action  # frames to continue action for
        #self.isRGB = isRGB #always returns color, lets tensorforce due the processing
        self.visualize = visualize
        self.width = width
        self.height = height
        #testing
        self.reached_terminal = 0
        self.episode_time_steps = 0
        self.episode_reward = 0
        self.total_time_steps = 0

        if self.env_name == 'catcher':
            self.game = Catcher(width=self.width, height=self.height,init_lives=self.lives)
        elif self.env_name == 'pixelcopter':
            self.game = Pixelcopter(width=self.width, height=self.height)
        elif self.env_name == 'pong':
            self.game = Pong(width=self.width, height=self.height,MAX_SCORE=self.lives)
        elif self.env_name == 'puckworld':
            self.game = PuckWorld(width=self.width, height=self.height)
        elif self.env_name == 'raycastmaze':
            self.game = RaycastMaze(width=self.width, height=self.height)
        elif self.env_name == 'snake':
            self.game = Snake(width=self.width, height=self.height)
        elif self.env_name == 'waterworld':
            self.game = WaterWorld(width=self.width, height=self.height)
        elif self.env_name == 'monsterkong':
            self.game = MonsterKong()
        elif self.env_name == 'flappybird':
            self.game = FlappyBird(width=144, height=256)  # limitations on height and width for flappy bird
        else:
            raise TensorForceError('Unknown Game Environement.')

        if self.state_as_image:
           process_state = None
        else:
            #create a preprocessor to read the state dictionary as a numpy array
            def process_state(state):
                # ret_value = np.fromiter(state.values(),dtype=float,count=len(state))
                ret_value = np.array(list(state.values()), dtype=np.float32)
                return ret_value

        # make a PLE instance
        self.env = PLE(self.game,reward_values=self.rewards,fps=self.fps, frame_skip=self.frame_skip,
                       num_steps=self.ple_num_steps,force_fps=self.force_fps,display_screen=self.visualize,
                       state_preprocessor = process_state)
        #self.env.init()
        #self.env.act(self.env.NOOP) #game starts on black screen
        #self.env.reset_game()
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.act(self.env.NOOP)
        #self.env.reset_game()


        # setup gamescreen object
        if state_as_image:
            w, h = self.env.getScreenDims()
            self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        else:
            self.gamescreen = np.empty(self.env.getGameStateDims(), dtype=np.float32)
        # if isRGB:
        #     self.gamescreen = np.empty((h, w, 3), dtype=np.uint8)
        # else:
        #     self.gamescreen = np.empty((h, w), dtype=np.uint8)

        # setup action converter
        # PLE returns legal action indexes, convert these to just numbers
        self.action_list = self.env.getActionSet()
        self.action_list = sorted(self.action_list, key=lambda x: (x is None, x))



    def __str__(self):
        return 'PygameLearningEnvironment({})'.format(self.env_name)

    def close(self):
        pygame.quit()
        self.env = None

    def reset(self):
        # if isinstance(self.gym, gym.wrappers.Monitor):
        #     self.gym.stats_recorder.done = True
        #env.act(env.NOOP) # need to take an action or screen is black
        # clear gamescreen
        if self.state_as_image:
            self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.uint8)
        else:
            self.gamescreen = np.empty(self.gamescreen.shape, dtype=np.float32)
        self.env.reset_game()
        return self.current_state

    def execute(self, actions):

        #print("lives check in ple {}".format(self.env.lives()))
        #self.env.saveScreen("test_screen_capture_before_{}.png".format(self.total_time_steps))
        #lives_check = self.env.lives() #testing code

        ple_actions = self.action_list[actions]
        reward = self.env.act(ple_actions)
        state = self.current_state
        # testing code
        # self.env.saveScreen("test_screen_capture_after_{}.png".format(self.total_time_steps))
        # self.episode_time_steps += 1
        # self.episode_reward += reward
        # self.total_time_steps += 1
        # print("reward is {}".format(reward))
        # #if self.env.lives() != lives_check:
        # #    print('lives are different is game over? {}'.format(self.env.game_over()))
        # print('lives {}, game over {}, old lives {}'.format(self.env.lives(),self.env.game_over(),lives_check))

        if self.env.game_over():
            terminal = True
            # testing code
            self.reached_terminal += 1
            # print("GAME OVER reached terminal {}".format(self.reached_terminal))
            # print("episode time steps {}, episode reward {}".format(self.episode_time_steps,self.episode_reward))
            # self.episode_reward = 0
            # self.episode_time_steps = 0
            # print("total timesteps {}".format(self.total_time_steps))
        else:
            terminal = False

        return state, terminal, reward

    @property
    def actions(self):
        return dict(type='int', num_actions=len(self.action_list), names=self.action_list)

    # @property
    # def actions(self):
    #     return OpenAIGym.action_from_space(space=self.gym.action_space)

    #ALE implementation
    # @property
    # def actions(self):
    #     return dict(type='int', num_actions=len(self.action_inds), names=self.action_names)

    @property
    def states(self):
        return dict(shape=self.gamescreen.shape, type=float)

    @property
    def current_state(self):
        #returned state can either be an image or an np array of key components
        if self.state_as_image:
            self.gamescreen = self.env.getScreenRGB()
            # if isRGB:
            #     self.gamescreen = self.env.getScreenRGB()
            # else:
            #     self.gamescreen = self.env.getScreenGrayscale()
        else:
            self.gamescreen = self.env.getGameState()

        return np.copy(self.gamescreen)

    #ALE implementation
    # @property
    # def states(self):
    #     return dict(shape=self.gamescreen.shape, type=float)

    # @property
    # def current_state(self):
    #     self.gamescreen = self.ale.getScreenRGB(self.gamescreen)
    #     return np.copy(self.gamescreen)

    # @property
    # def is_terminal(self):
    #     if self.loss_of_life_termination and self.life_lost:
    #         return True
    #     else:
    #         return self.ale.game_over()
Ejemplo n.º 7
0
class PLEEnv(gym.Env):
    metadata = {'render.modes': ['human', 'rgb_array']}

    def __init__(self,
                 prespecified_game=True,
                 game_name='MyCatcher',
                 display_screen=True,
                 rgb_state=False):
        # open up a game state to communicate with emulator
        import importlib
        if prespecified_game:
            game_module_name = ('ple.games.%s' % game_name).lower()
        else:
            game_module_name = ('domains.ple.%s' % game_name).lower()
        game_module = importlib.import_module(game_module_name)
        self.game = getattr(game_module, game_name)()
        self.rgb_state = rgb_state
        if self.rgb_state:
            self.game_state = PLE(self.game,
                                  fps=30,
                                  display_screen=display_screen)
        else:
            if prespecified_game:
                self.game_state = PLE(
                    self.game,
                    fps=30,
                    display_screen=display_screen,
                    state_preprocessor=process_state_prespecified)
            else:
                self.game_state = PLE(self.game,
                                      fps=30,
                                      display_screen=display_screen,
                                      state_preprocessor=process_state)
        self.game_state.init()
        self._action_set = self.game_state.getActionSet()
        self.action_space = spaces.Discrete(len(self._action_set))
        if self.rgb_state:
            self.state_width, self.state_height = self.game_state.getScreenDims(
            )
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.state_width,
                                                       self.state_height, 3))
        else:
            self.state_dim = self.game_state.getGameStateDims()
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=self.state_dim)
        self.viewer = None
        self.feature_bins = []
        if hasattr(self.game, 'feature_bins'):
            self.feature_bins = self.game.feature_bins

    def get_source_state(self, state):
        if hasattr(self.game, 'get_source_state'):
            return self.game.get_source_state(state)
        return None

    def get_uniform_state_weights(self):
        if hasattr(self.game, 'get_uniform_state_weights'):
            return self.game.get_uniform_state_weights()
        else:
            states = self.get_states()
            weights = np.ones(len(states))
            weights = [float(i) / sum(weights) for i in weights]
            return states, weights

    def generate_training_subset(self, percent_sim_data):
        if hasattr(self.game, 'generate_training_subset'):
            return self.game.generate_training_subset(percent_sim_data)

    def set_to_training_set(self):
        if hasattr(self.game, 'set_to_training_set'):
            return self.game.set_to_training_set()

    def set_to_testing_set(self):
        if hasattr(self.game, 'set_to_testing_set'):
            return self.game.set_to_testing_set()

    def get_states(self):
        if hasattr(self.game, 'states'):
            return self.game.states

    def _step(self, a):
        reward = self.game_state.act(self._action_set[a])
        state = self._get_state()
        terminal = self.game_state.game_over()
        return state, reward, terminal, {}

    def _get_image(self, game_state):
        image_rotated = np.fliplr(
            np.rot90(game_state.getScreenRGB(),
                     3))  # Hack to fix the rotated image returned by ple
        return image_rotated

    def _get_state(self):
        if self.rgb_state:
            return self._get_image(self.game_state)
        else:
            return self.game_state.getGameState()

    @property
    def _n_actions(self):
        return len(self._action_set)

    # return: (states, observations)
    def _reset(self):
        if self.rgb_state:
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=(self.state_width,
                                                       self.state_height, 3))
        else:
            self.observation_space = spaces.Box(low=0,
                                                high=255,
                                                shape=self.state_dim)
        self.game_state.reset_game()
        state = self._get_state()
        return state

    def _render(self, mode='human', close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return
        img = self._get_image(self.game_state)
        if mode == 'rgb_array':
            return img
        elif mode == 'human':
            from gym.envs.classic_control import rendering
            if self.viewer is None:
                self.viewer = rendering.SimpleImageViewer()
            self.viewer.imshow(img)

    def _seed(self, seed):
        rng = np.random.RandomState(seed)
        self.game_state.rng = rng
        self.game_state.game.rng = self.game_state.rng

        self.game_state.init()
# shooting agent
agent = ShootAgent(p.getActionSet())

# init agent and game.
p.init()

# lets do a random number of NOOP's
for i in range(np.random.randint(0, max_noops)):
    reward = p.act(p.NOOP)

# start our training loop
for f in range(nb_frames):
    # if the game is over
    if p.game_over():
        p.reset_game()
        print('game over')

    (screen_width, screen_height) = p.getScreenDims()
    print(screen_width, screen_height)
    print(p.getGameStateDims())
    obs = p.getScreenRGB()
    from PIL import Image
    img = Image.fromarray(obs)
    img.show()
    # state = p.getGameState()
    break
    # print(state)
    # action = agent.pickAction(reward, obs)
    # reward = p.act(action)
#     print('score: {}'.format(reward))
Ejemplo n.º 9
0
    ]).reshape(1, -1)


game = Joust(display_screen=True)

p = PLE(game,
        fps=30,
        display_screen=False,
        state_preprocessor=process_state,
        force_fps=False)
p.init()
player1 = game.player1
player2 = game.player2
agent1 = DQNAgent(player1,
                  game.p1_actions,
                  p.getGameStateDims(),
                  log_level=logging.INFO)
agent2 = DQNAgent(player2,
                  game.p2_actions,
                  p.getGameStateDims(),
                  log_level=logging.INFO)

game.adjustRewards({
    "positive": 0.1,
    "tick": 0.001,
    "negative": -0.1,
    "win": 1,
    "loss": -1
})

nb_frames = 500
Ejemplo n.º 10
0
            targetH = selectedPipeBotY - DELTA_H

        if (state[player_y] > targetH):
            action = flap

        return self.possibleActions[action]


def preprocessor(state):
    return np.array([state[k] for k in sorted(state.keys())])


game = flappy.FlappyClone(crazy=False)
env = PLE(game, display_screen=True, force_fps=True, fps=30,
          state_preprocessor=preprocessor)
env.init()
expertAgent = ExpertAgent(env.getActionSet(), env.getGameStateDims())

for e in range(1, 101):
    while True:
        if env.game_over(): # if the game is over, reset
            print("test {}, death at score: {}".format(e, game.getScore()))
            env.game.tick(1. / 2.)
            env.reset_game()
            break
        reward = env.act(expertAgent.getAction(env.getGameState()))
        print("score={:010.1f}".format(game.getScore()), end="\r")
        env.game.tick(FPS)